[Midnightbsd-cvs] src [9897] trunk/sys/ufs/ufs: sync with freebsd 10-stable

Thu May 24 18:29:09 EDT 2018

Revision: 9897
          http://svnweb.midnightbsd.org/src/?rev=9897
Author:   laffer1
Date:     2018-05-24 18:29:08 -0400 (Thu, 24 May 2018)
Log Message:
-----------
sync with freebsd 10-stable

Modified Paths:
--------------
    trunk/sys/ufs/ffs/ffs_alloc.c
    trunk/sys/ufs/ffs/ffs_balloc.c
    trunk/sys/ufs/ffs/ffs_extern.h
    trunk/sys/ufs/ffs/ffs_inode.c
    trunk/sys/ufs/ffs/ffs_rawread.c
    trunk/sys/ufs/ffs/ffs_snapshot.c
    trunk/sys/ufs/ffs/ffs_softdep.c
    trunk/sys/ufs/ffs/ffs_subr.c
    trunk/sys/ufs/ffs/ffs_suspend.c
    trunk/sys/ufs/ffs/ffs_tables.c
    trunk/sys/ufs/ffs/ffs_vfsops.c
    trunk/sys/ufs/ffs/ffs_vnops.c
    trunk/sys/ufs/ffs/fs.h
    trunk/sys/ufs/ffs/softdep.h
    trunk/sys/ufs/ufs/README.acls
    trunk/sys/ufs/ufs/README.extattr
    trunk/sys/ufs/ufs/acl.h
    trunk/sys/ufs/ufs/dinode.h
    trunk/sys/ufs/ufs/dir.h
    trunk/sys/ufs/ufs/dirhash.h
    trunk/sys/ufs/ufs/extattr.h
    trunk/sys/ufs/ufs/gjournal.h
    trunk/sys/ufs/ufs/inode.h
    trunk/sys/ufs/ufs/quota.h
    trunk/sys/ufs/ufs/ufs_acl.c
    trunk/sys/ufs/ufs/ufs_bmap.c
    trunk/sys/ufs/ufs/ufs_dirhash.c
    trunk/sys/ufs/ufs/ufs_extattr.c
    trunk/sys/ufs/ufs/ufs_extern.h
    trunk/sys/ufs/ufs/ufs_gjournal.c
    trunk/sys/ufs/ufs/ufs_inode.c
    trunk/sys/ufs/ufs/ufs_lookup.c
    trunk/sys/ufs/ufs/ufs_quota.c
    trunk/sys/ufs/ufs/ufs_vfsops.c
    trunk/sys/ufs/ufs/ufs_vnops.c
    trunk/sys/ufs/ufs/ufsmount.h

Property Changed:
----------------
    trunk/sys/ufs/ufs/README.acls
    trunk/sys/ufs/ufs/README.extattr

Modified: trunk/sys/ufs/ffs/ffs_alloc.c
===================================================================

--- trunk/sys/ufs/ffs/ffs_alloc.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_alloc.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -61,12 +61,12 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/ufs/ffs/ffs_alloc.c 248667 2013-03-23 22:41:48Z kib $");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_alloc.c 306630 2016-10-03 10:15:16Z kib $");
 
 #include "opt_quota.h"
 
 #include <sys/param.h>
-#include <sys/capability.h>
+#include <sys/capsicum.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
@@ -113,8 +113,7 @@
 #ifdef INVARIANTS
 static int	ffs_checkblk(struct inode *, ufs2_daddr_t, long);
 #endif
-static ufs2_daddr_t ffs_clusteralloc(struct inode *, u_int, ufs2_daddr_t, int,
-		    int);
+static ufs2_daddr_t ffs_clusteralloc(struct inode *, u_int, ufs2_daddr_t, int);
 static ino_t	ffs_dirpref(struct inode *);
 static ufs2_daddr_t ffs_fragextend(struct inode *, u_int, ufs2_daddr_t,
 		    int, int);
@@ -255,17 +254,18 @@
 	struct buf *bp;
 	struct ufsmount *ump;
 	u_int cg, request, reclaimed;
-	int error;
+	int error, gbflags;
 	ufs2_daddr_t bno;
 	static struct timeval lastfail;
 	static int curfail;
 	int64_t delta;
 
-	*bpp = 0;
 	vp = ITOV(ip);
 	fs = ip->i_fs;
 	bp = NULL;
 	ump = ip->i_ump;
+	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
+
 	mtx_assert(UFS_MTX(ump), MA_OWNED);
 #ifdef INVARIANTS
 	if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
@@ -297,7 +297,7 @@
 	/*
 	 * Allocate the extra space in the buffer.
 	 */
-	error = bread(vp, lbprev, osize, NOCRED, &bp);
+	error = bread_gb(vp, lbprev, osize, NOCRED, gbflags, &bp);
 	if (error) {
 		brelse(bp);
 		return (error);
@@ -319,6 +319,7 @@
 	/*
 	 * Check for extension in the existing location.
 	 */
+	*bpp = NULL;
 	cg = dtog(fs, bprev);
 	UFS_LOCK(ump);
 	bno = ffs_fragextend(ip, cg, bprev, osize, nsize);
@@ -333,7 +334,7 @@
 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		allocbuf(bp, nsize);
 		bp->b_flags |= B_DONE;
-		bzero(bp->b_data + osize, nsize - osize);
+		vfs_bio_bzero_buf(bp, osize, nsize - osize);
 		if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO)
 			vfs_bio_set_valid(bp, osize, nsize - osize);
 		*bpp = bp;
@@ -401,7 +402,7 @@
 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		allocbuf(bp, nsize);
 		bp->b_flags |= B_DONE;
-		bzero(bp->b_data + osize, nsize - osize);
+		vfs_bio_bzero_buf(bp, osize, nsize - osize);
 		if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO)
 			vfs_bio_set_valid(bp, osize, nsize - osize);
 		*bpp = bp;
@@ -459,11 +460,17 @@
 SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem");
 
 static int doasyncfree = 1;
-SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, "");
+SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0,
+"do not force synchronous writes when blocks are reallocated");
 
 static int doreallocblks = 1;
-SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
+SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0,
+"enable block reallocation");
 
+static int maxclustersearch = 10;
+SYSCTL_INT(_vfs_ffs, OID_AUTO, maxclustersearch, CTLFLAG_RW, &maxclustersearch,
+0, "max number of cylinder group to search for contigous blocks");
+
 #ifdef DEBUG
 static volatile int prtrealloc = 0;
 #endif
@@ -502,7 +509,7 @@
 	struct inode *ip;
 	struct vnode *vp;
 	struct buf *sbp, *ebp;
-	ufs1_daddr_t *bap, *sbap, *ebap = 0;
+	ufs1_daddr_t *bap, *sbap, *ebap;
 	struct cluster_save *buflist;
 	struct ufsmount *ump;
 	ufs_lbn_t start_lbn, end_lbn;
@@ -509,13 +516,19 @@
 	ufs1_daddr_t soff, newblk, blkno;
 	ufs2_daddr_t pref;
 	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
-	int i, len, start_lvl, end_lvl, ssize;
+	int i, cg, len, start_lvl, end_lvl, ssize;
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 	fs = ip->i_fs;
 	ump = ip->i_ump;
-	if (fs->fs_contigsumsize <= 0)
+	/*
+	 * If we are not tracking block clusters or if we have less than 4%
+	 * free blocks left, then do not attempt to cluster. Running with
+	 * less than 5% free block reserve is not recommended and those that
+	 * choose to do so do not expect to have good file layout.
+	 */
+	if (fs->fs_contigsumsize <= 0 || freespace(fs, 4) < 0)
 		return (ENOSPC);
 	buflist = ap->a_buflist;
 	len = buflist->bs_nchildren;
@@ -576,6 +589,7 @@
 	/*
 	 * If the block range spans two block maps, get the second map.
 	 */
+	ebap = NULL;
 	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
 		ssize = len;
 	} else {
@@ -590,18 +604,39 @@
 		ebap = (ufs1_daddr_t *)ebp->b_data;
 	}
 	/*
-	 * Find the preferred location for the cluster.
+	 * Find the preferred location for the cluster. If we have not
+	 * previously failed at this endeavor, then follow our standard
+	 * preference calculation. If we have failed at it, then pick up
+	 * where we last ended our search.
 	 */
 	UFS_LOCK(ump);
-	pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap);
+	if (ip->i_nextclustercg == -1)
+		pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap);
+	else
+		pref = cgdata(fs, ip->i_nextclustercg);
 	/*
 	 * Search the block map looking for an allocation of the desired size.
+	 * To avoid wasting too much time, we limit the number of cylinder
+	 * groups that we will search.
 	 */
-	if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
-	    len, len, ffs_clusteralloc)) == 0) {
+	cg = dtog(fs, pref);
+	for (i = min(maxclustersearch, fs->fs_ncg); i > 0; i--) {
+		if ((newblk = ffs_clusteralloc(ip, cg, pref, len)) != 0)
+			break;
+		cg += 1;
+		if (cg >= fs->fs_ncg)
+			cg = 0;
+	}
+	/*
+	 * If we have failed in our search, record where we gave up for
+	 * next time. Otherwise, fall back to our usual search citerion.
+	 */
+	if (newblk == 0) {
+		ip->i_nextclustercg = cg;
 		UFS_UNLOCK(ump);
 		goto fail;
 	}
+	ip->i_nextclustercg = -1;
 	/*
 	 * We have found a new contiguous block.
 	 *
@@ -611,7 +646,8 @@
 	 */
 #ifdef DEBUG
 	if (prtrealloc)
-		printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number,
+		printf("realloc: ino %ju, lbns %jd-%jd\n\told:",
+		    (uintmax_t)ip->i_number,
 		    (intmax_t)start_lbn, (intmax_t)end_lbn);
 #endif
 	blkno = newblk;
@@ -723,19 +759,25 @@
 	struct inode *ip;
 	struct vnode *vp;
 	struct buf *sbp, *ebp;
-	ufs2_daddr_t *bap, *sbap, *ebap = 0;
+	ufs2_daddr_t *bap, *sbap, *ebap;
 	struct cluster_save *buflist;
 	struct ufsmount *ump;
 	ufs_lbn_t start_lbn, end_lbn;
 	ufs2_daddr_t soff, newblk, blkno, pref;
 	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
-	int i, len, start_lvl, end_lvl, ssize;
+	int i, cg, len, start_lvl, end_lvl, ssize;
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 	fs = ip->i_fs;
 	ump = ip->i_ump;
-	if (fs->fs_contigsumsize <= 0)
+	/*
+	 * If we are not tracking block clusters or if we have less than 4%
+	 * free blocks left, then do not attempt to cluster. Running with
+	 * less than 5% free block reserve is not recommended and those that
+	 * choose to do so do not expect to have good file layout.
+	 */
+	if (fs->fs_contigsumsize <= 0 || freespace(fs, 4) < 0)
 		return (ENOSPC);
 	buflist = ap->a_buflist;
 	len = buflist->bs_nchildren;
@@ -796,6 +838,7 @@
 	/*
 	 * If the block range spans two block maps, get the second map.
 	 */
+	ebap = NULL;
 	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
 		ssize = len;
 	} else {
@@ -810,18 +853,39 @@
 		ebap = (ufs2_daddr_t *)ebp->b_data;
 	}
 	/*
-	 * Find the preferred location for the cluster.
+	 * Find the preferred location for the cluster. If we have not
+	 * previously failed at this endeavor, then follow our standard
+	 * preference calculation. If we have failed at it, then pick up
+	 * where we last ended our search.
 	 */
 	UFS_LOCK(ump);
-	pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap);
+	if (ip->i_nextclustercg == -1)
+		pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap);
+	else
+		pref = cgdata(fs, ip->i_nextclustercg);
 	/*
 	 * Search the block map looking for an allocation of the desired size.
+	 * To avoid wasting too much time, we limit the number of cylinder
+	 * groups that we will search.
 	 */
-	if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
-	    len, len, ffs_clusteralloc)) == 0) {
+	cg = dtog(fs, pref);
+	for (i = min(maxclustersearch, fs->fs_ncg); i > 0; i--) {
+		if ((newblk = ffs_clusteralloc(ip, cg, pref, len)) != 0)
+			break;
+		cg += 1;
+		if (cg >= fs->fs_ncg)
+			cg = 0;
+	}
+	/*
+	 * If we have failed in our search, record where we gave up for
+	 * next time. Otherwise, fall back to our usual search citerion.
+	 */
+	if (newblk == 0) {
+		ip->i_nextclustercg = cg;
 		UFS_UNLOCK(ump);
 		goto fail;
 	}
+	ip->i_nextclustercg = -1;
 	/*
 	 * We have found a new contiguous block.
 	 *
@@ -1168,14 +1232,14 @@
 	for (cg = prefcg; cg < fs->fs_ncg; cg++)
 		if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
 		    fs->fs_cs(fs, cg).cs_nifree >= minifree &&
-	    	    fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
+		    fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
 			if (fs->fs_contigdirs[cg] < maxcontigdirs)
 				return ((ino_t)(fs->fs_ipg * cg));
 		}
-	for (cg = prefcg - 1; cg >= 0; cg--)
+	for (cg = 0; cg < prefcg; cg++)
 		if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
 		    fs->fs_cs(fs, cg).cs_nifree >= minifree &&
-	    	    fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
+		    fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
 			if (fs->fs_contigdirs[cg] < maxcontigdirs)
 				return ((ino_t)(fs->fs_ipg * cg));
 		}
@@ -1185,7 +1249,7 @@
 	for (cg = prefcg; cg < fs->fs_ncg; cg++)
 		if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
 			return ((ino_t)(fs->fs_ipg * cg));
-	for (cg = prefcg - 1; cg >= 0; cg--)
+	for (cg = 0; cg < prefcg; cg++)
 		if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
 			break;
 	return ((ino_t)(fs->fs_ipg * cg));
@@ -1194,7 +1258,8 @@
 /*
  * Select the desired position for the next block in a file.  The file is
  * logically divided into sections. The first section is composed of the
- * direct blocks. Each additional section contains fs_maxbpg blocks.
+ * direct blocks and the next fs_maxbpg blocks. Each additional section
+ * contains fs_maxbpg blocks.
  *
  * If no blocks have been allocated in the first section, the policy is to
  * request a block in the same cylinder group as the inode that describes
@@ -1212,14 +1277,12 @@
  * cylinder group from which the previous allocation was made. The sweep
  * continues until a cylinder group with greater than the average number
  * of free blocks is found. If the allocation is for the first block in an
- * indirect block, the information on the previous allocation is unavailable;
- * here a best guess is made based upon the logical block number being
- * allocated.
+ * indirect block or the previous block is a hole, then the information on
+ * the previous allocation is unavailable; here a best guess is made based
+ * on the logical block number being allocated.
  *
  * If a section is already partially allocated, the policy is to
- * contiguously allocate fs_maxcontig blocks. The end of one of these
- * contiguous blocks and the beginning of the next is laid out
- * contiguously if possible.
+ * allocate blocks contiguously within the section if possible.
  */
 ufs2_daddr_t
 ffs_blkpref_ufs1(ip, lbn, indx, bap)
@@ -1708,7 +1771,7 @@
 	cgp = (struct cg *)bp->b_data;
 	blksfree = cg_blksfree(cgp);
 	if (bpref == 0) {
-		bpref = cgp->cg_rotor;
+		bpref = cgbase(fs, cgp->cg_cgx) + cgp->cg_rotor + fs->fs_frag;
 	} else if ((cgbpref = dtog(fs, bpref)) != cgp->cg_cgx) {
 		/* map bpref to correct zone in this cg */
 		if (bpref < cgdata(fs, cgbpref))
@@ -1772,12 +1835,11 @@
  * take the first one that we find following bpref.
  */
 static ufs2_daddr_t
-ffs_clusteralloc(ip, cg, bpref, len, unused)
+ffs_clusteralloc(ip, cg, bpref, len)
 	struct inode *ip;
 	u_int cg;
 	ufs2_daddr_t bpref;
 	int len;
-	int unused;
 {
 	struct fs *fs;
 	struct cg *cgp;
@@ -1920,9 +1982,9 @@
 	struct cg *cgp;
 	struct buf *bp, *ibp;
 	struct ufsmount *ump;
-	u_int8_t *inosused;
+	u_int8_t *inosused, *loc;
 	struct ufs2_dinode *dp2;
-	int error, start, len, loc, map, i;
+	int error, start, len, i;
 	u_int32_t old_initediblk;
 
 	fs = ip->i_fs;
@@ -1954,12 +2016,12 @@
 	}
 	start = cgp->cg_irotor / NBBY;
 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
-	loc = skpc(0xff, len, &inosused[start]);
-	if (loc == 0) {
+	loc = memcchr(&inosused[start], 0xff, len);
+	if (loc == NULL) {
 		len = start + 1;
 		start = 0;
-		loc = skpc(0xff, len, &inosused[0]);
-		if (loc == 0) {
+		loc = memcchr(&inosused[start], 0xff, len);
+		if (loc == NULL) {
 			printf("cg = %d, irotor = %ld, fs = %s\n",
 			    cg, (long)cgp->cg_irotor, fs->fs_fsmnt);
 			panic("ffs_nodealloccg: map corrupted");
@@ -1966,13 +2028,7 @@
 			/* NOTREACHED */
 		}
 	}
-	i = start + len - loc;
-	map = inosused[i] ^ 0xff;
-	if (map == 0) {
-		printf("fs = %s\n", fs->fs_fsmnt);
-		panic("ffs_nodealloccg: block not in map");
-	}
-	ipref = i * NBBY + ffs(map) - 1;
+	ipref = (loc - inosused) * NBBY + ffs(~*loc) - 1;
 gotit:
 	/*
 	 * Check to see if we need to initialize more inodes.
@@ -2101,12 +2157,13 @@
 		/* devvp is a snapshot */
 		dev = VTOI(devvp)->i_devvp->v_rdev;
 		cgblkno = fragstoblks(fs, cgtod(fs, cg));
-	} else {
+	} else if (devvp->v_type == VCHR) {
 		/* devvp is a normal disk device */
 		dev = devvp->v_rdev;
 		cgblkno = fsbtodb(fs, cgtod(fs, cg));
 		ASSERT_VOP_LOCKED(devvp, "ffs_blkfree_cg");
-	}
+	} else
+		return;
 #ifdef INVARIANTS
 	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
 	    fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
@@ -2200,14 +2257,12 @@
 	ACTIVECLEAR(fs, cg);
 	UFS_UNLOCK(ump);
 	mp = UFSTOVFS(ump);
-	if (MOUNTEDSOFTDEP(mp) && devvp->v_type != VREG)
+	if (MOUNTEDSOFTDEP(mp) && devvp->v_type == VCHR)
 		softdep_setup_blkfree(UFSTOVFS(ump), bp, bno,
 		    numfrags(fs, size), dephd);
 	bdwrite(bp);
 }
 
-TASKQUEUE_DEFINE_THREAD(ffs_trim);
-
 struct ffs_blkfree_trim_params {
 	struct task task;
 	struct ufsmount *ump;
@@ -2230,6 +2285,7 @@
 	ffs_blkfree_cg(tp->ump, tp->ump->um_fs, tp->devvp, tp->bno, tp->size,
 	    tp->inum, tp->pdephd);
 	vn_finished_secondary_write(UFSTOVFS(tp->ump));
+	atomic_add_int(&tp->ump->um_trim_inflight, -1);
 	free(tp, M_TEMP);
 }
 
@@ -2242,7 +2298,7 @@
 	tp = bip->bio_caller2;
 	g_destroy_bio(bip);
 	TASK_INIT(&tp->task, 0, ffs_blkfree_trim_task, tp);
-	taskqueue_enqueue(taskqueue_ffs_trim, &tp->task);
+	taskqueue_enqueue(tp->ump->um_trim_tq, &tp->task);
 }
 
 void
@@ -2266,7 +2322,7 @@
 	 * it has a snapshot(s) associated with it, and one of the
 	 * snapshots wants to claim the block.
 	 */
-	if (devvp->v_type != VREG &&
+	if (devvp->v_type == VCHR &&
 	    (devvp->v_vflag & VV_COPYONWRITE) &&
 	    ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, dephd)) {
 		return;
@@ -2286,6 +2342,7 @@
 	 * reordering, TRIM might be issued after we reuse the block
 	 * and write some new data into it.
 	 */
+	atomic_add_int(&ump->um_trim_inflight, 1);
 	tp = malloc(sizeof(struct ffs_blkfree_trim_params), M_TEMP, M_WAITOK);
 	tp->ump = ump;
 	tp->devvp = devvp;
@@ -2408,14 +2465,17 @@
 		/* devvp is a snapshot */
 		dev = VTOI(devvp)->i_devvp->v_rdev;
 		cgbno = fragstoblks(fs, cgtod(fs, cg));
-	} else {
+	} else if (devvp->v_type == VCHR) {
 		/* devvp is a normal disk device */
 		dev = devvp->v_rdev;
 		cgbno = fsbtodb(fs, cgtod(fs, cg));
+	} else {
+		bp = NULL;
+		return (0);
 	}
 	if (ino >= fs->fs_ipg * fs->fs_ncg)
-		panic("ffs_freefile: range: dev = %s, ino = %lu, fs = %s",
-		    devtoname(dev), (u_long)ino, fs->fs_fsmnt);
+		panic("ffs_freefile: range: dev = %s, ino = %ju, fs = %s",
+		    devtoname(dev), (uintmax_t)ino, fs->fs_fsmnt);
 	if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) {
 		brelse(bp);
 		return (error);
@@ -2430,8 +2490,8 @@
 	inosused = cg_inosused(cgp);
 	ino %= fs->fs_ipg;
 	if (isclr(inosused, ino)) {
-		printf("dev = %s, ino = %u, fs = %s\n", devtoname(dev),
-		    ino + cg * fs->fs_ipg, fs->fs_fsmnt);
+		printf("dev = %s, ino = %ju, fs = %s\n", devtoname(dev),
+		    (uintmax_t)(ino + cg * fs->fs_ipg), fs->fs_fsmnt);
 		if (fs->fs_ronly == 0)
 			panic("ffs_freefile: freeing free inode");
 	}
@@ -2450,7 +2510,7 @@
 	fs->fs_fmod = 1;
 	ACTIVECLEAR(fs, cg);
 	UFS_UNLOCK(ump);
-	if (MOUNTEDSOFTDEP(UFSTOVFS(ump)) && devvp->v_type != VREG)
+	if (MOUNTEDSOFTDEP(UFSTOVFS(ump)) && devvp->v_type == VCHR)
 		softdep_setup_inofree(UFSTOVFS(ump), bp,
 		    ino + cg * fs->fs_ipg, wkhd);
 	bdwrite(bp);
@@ -2477,9 +2537,11 @@
 	if (devvp->v_type == VREG) {
 		/* devvp is a snapshot */
 		cgbno = fragstoblks(fs, cgtod(fs, cg));
-	} else {
+	} else if (devvp->v_type == VCHR) {
 		/* devvp is a normal disk device */
 		cgbno = fsbtodb(fs, cgtod(fs, cg));
+	} else {
+		return (1);
 	}
 	if (ino >= fs->fs_ipg * fs->fs_ncg)
 		return (1);
@@ -2581,8 +2643,9 @@
 	struct thread *td = curthread;	/* XXX */
 	struct proc *p = td->td_proc;
 
-	log(LOG_ERR, "pid %d (%s), uid %d inumber %d on %s: %s\n",
-	    p->p_pid, p->p_comm, td->td_ucred->cr_uid, inum, fs->fs_fsmnt, cp);
+	log(LOG_ERR, "pid %d (%s), uid %d inumber %ju on %s: %s\n",
+	    p->p_pid, p->p_comm, td->td_ucred->cr_uid, (uintmax_t)inum,
+	    fs->fs_fsmnt, cp);
 }
 
 /*
@@ -2701,7 +2764,8 @@
 	long blkcnt, blksize;
 	struct filedesc *fdp;
 	struct file *fp, *vfp;
-	int vfslocked, filetype, error;
+	cap_rights_t rights;
+	int filetype, error;
 	static struct fileops *origops, bufferedops;
 
 	if (req->newlen > sizeof cmd)
@@ -2710,8 +2774,8 @@
 		return (error);
 	if (cmd.version != FFS_CMD_VERSION)
 		return (ERPCMISMATCH);
-	if ((error = getvnode(td->td_proc->p_fd, cmd.handle, CAP_FSCK,
-	     &fp)) != 0)
+	if ((error = getvnode(td->td_proc->p_fd, cmd.handle,
+	    cap_rights_init(&rights, CAP_FSCK), &fp)) != 0)
 		return (error);
 	vp = fp->f_data;
 	if (vp->v_type != VREG && vp->v_type != VDIR) {
@@ -2719,7 +2783,8 @@
 		return (EINVAL);
 	}
 	vn_start_write(vp, &mp, V_WAIT);
-	if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) {
+	if (mp == NULL ||
+	    strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) {
 		vn_finished_write(mp);
 		fdrop(fp, td);
 		return (EINVAL);
@@ -2794,16 +2859,16 @@
 #ifdef DEBUG
 		if (fsckcmds) {
 			if (cmd.size == 1)
-				printf("%s: free %s inode %d\n",
+				printf("%s: free %s inode %ju\n",
 				    mp->mnt_stat.f_mntonname,
 				    filetype == IFDIR ? "directory" : "file",
-				    (ino_t)cmd.value);
+				    (uintmax_t)cmd.value);
 			else
-				printf("%s: free %s inodes %d-%d\n",
+				printf("%s: free %s inodes %ju-%ju\n",
 				    mp->mnt_stat.f_mntonname,
 				    filetype == IFDIR ? "directory" : "file",
-				    (ino_t)cmd.value,
-				    (ino_t)(cmd.value + cmd.size - 1));
+				    (uintmax_t)cmd.value,
+				    (uintmax_t)(cmd.value + cmd.size - 1));
 		}
 #endif /* DEBUG */
 		while (cmd.size > 0) {
@@ -2906,23 +2971,18 @@
 #endif /* DEBUG */
 		if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_SHARED, &vp)))
 			break;
-		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 		AUDIT_ARG_VNODE1(vp);
 		if ((error = change_dir(vp, td)) != 0) {
 			vput(vp);
-			VFS_UNLOCK_GIANT(vfslocked);
 			break;
 		}
 		VOP_UNLOCK(vp, 0);
-		VFS_UNLOCK_GIANT(vfslocked);
 		fdp = td->td_proc->p_fd;
 		FILEDESC_XLOCK(fdp);
 		vpold = fdp->fd_cdir;
 		fdp->fd_cdir = vp;
 		FILEDESC_XUNLOCK(fdp);
-		vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
 		vrele(vpold);
-		VFS_UNLOCK_GIANT(vfslocked);
 		break;
 
 	case FFS_SET_DOTDOT:
@@ -2995,7 +3055,6 @@
 #endif /* DEBUG */
 		if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
 			break;
-		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 		AUDIT_ARG_VNODE1(vp);
 		ip = VTOI(vp);
 		if (ip->i_ump->um_fstype == UFS1)
@@ -3006,13 +3065,11 @@
 			    sizeof(struct ufs2_dinode));
 		if (error) {
 			vput(vp);
-			VFS_UNLOCK_GIANT(vfslocked);
 			break;
 		}
 		ip->i_flag |= IN_CHANGE | IN_MODIFIED;
 		error = ffs_update(vp, 1);
 		vput(vp);
-		VFS_UNLOCK_GIANT(vfslocked);
 		break;
 
 	case FFS_SET_BUFOUTPUT:
@@ -3033,7 +3090,7 @@
 		}
 #endif /* DEBUG */
 		if ((error = getvnode(td->td_proc->p_fd, cmd.value,
-		    CAP_FSCK, &vfp)) != 0)
+		    cap_rights_init(&rights, CAP_FSCK), &vfp)) != 0)
 			break;
 		if (vfp->f_vnode->v_type != VCHR) {
 			fdrop(vfp, td);
@@ -3090,7 +3147,7 @@
 	struct buf *bp;
 	struct fs *fs;
 	struct filedesc *fdp;
-	int error, vfslocked;
+	int error;
 	daddr_t lbn;
 
 	/*
@@ -3107,7 +3164,6 @@
 	vp = fdp->fd_cdir;
 	vref(vp);
 	FILEDESC_SUNLOCK(fdp);
-	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	/*
 	 * Check that the current directory vnode indeed belongs to
@@ -3115,18 +3171,15 @@
 	 */
 	if (vp->v_op != &ffs_vnodeops1 && vp->v_op != &ffs_vnodeops2) {
 		vput(vp);
-		VFS_UNLOCK_GIANT(vfslocked);
 		return (EINVAL);
 	}
 	ip = VTOI(vp);
 	if (ip->i_devvp != devvp) {
 		vput(vp);
-		VFS_UNLOCK_GIANT(vfslocked);
 		return (EINVAL);
 	}
 	fs = ip->i_fs;
 	vput(vp);
-	VFS_UNLOCK_GIANT(vfslocked);
 	foffset_lock_uio(fp, uio, flags);
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 #ifdef DEBUG

Modified: trunk/sys/ufs/ffs/ffs_balloc.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_balloc.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_balloc.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
@@ -60,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_balloc.c 304672 2016-08-23 07:55:32Z kib $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -107,7 +108,7 @@
 	int saved_inbdflush;
 	static struct timeval lastfail;
 	static int curfail;
-	int reclaimed;
+	int gbflags, reclaimed;
 
 	ip = VTOI(vp);
 	dp = ip->i_din1;
@@ -123,6 +124,7 @@
 		return (EOPNOTSUPP);
 	if (lbn < 0)
 		return (EFBIG);
+	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
 
 	if (DOINGSOFTDEP(vp))
 		softdep_prealloc(vp, MNT_WAIT);
@@ -211,7 +213,7 @@
 			    nsize, flags, cred, &newb);
 			if (error)
 				return (error);
-			bp = getblk(vp, lbn, nsize, 0, 0, 0);
+			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
 			bp->b_blkno = fsbtodb(fs, newb);
 			if (flags & BA_CLRBUF)
 				vfs_bio_clrbuf(bp);
@@ -247,7 +249,7 @@
 		UFS_LOCK(ump);
 		pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
 		    (ufs1_daddr_t *)0);
-	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
 		    flags, cred, &newb)) != 0) {
 			curthread_pflags_restore(saved_inbdflush);
 			return (error);
@@ -254,9 +256,11 @@
 		}
 		pref = newb + fs->fs_frag;
 		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
 		*allocblk++ = nb;
 		*lbns_remfree++ = indirs[1].in_lbn;
-		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
+		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
 		bp->b_blkno = fsbtodb(fs, nb);
 		vfs_bio_clrbuf(bp);
 		if (DOINGSOFTDEP(vp)) {
@@ -298,6 +302,10 @@
 			continue;
 		}
 		UFS_LOCK(ump);
+		/*
+		 * If parent indirect has just been allocated, try to cluster
+		 * immediately following it.
+		 */
 		if (pref == 0)
 			pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
 			    (ufs1_daddr_t *)0);
@@ -304,7 +312,7 @@
 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
 			brelse(bp);
-			if (++reclaimed == 1) {
+			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
 				UFS_LOCK(ump);
 				softdep_request_cleanup(fs, vp, cred,
 				    FLUSH_BLOCKS_WAIT);
@@ -320,6 +328,8 @@
 		}
 		pref = newb + fs->fs_frag;
 		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
 		*allocblk++ = nb;
 		*lbns_remfree++ = indirs[i].in_lbn;
 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
@@ -367,7 +377,14 @@
 	 */
 	if (nb == 0) {
 		UFS_LOCK(ump);
-		if (pref == 0)
+		/*
+		 * If allocating metadata at the front of the cylinder
+		 * group and parent indirect block has just been allocated,
+		 * then cluster next to it if it is the first indirect in
+		 * the file. Otherwise it has been allocated in the metadata
+		 * area, so we want to find our own place out in the data area.
+		 */
+		if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0))
 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
 			    &bap[0]);
 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
@@ -374,7 +391,7 @@
 		    flags | IO_BUFLOCKED, cred, &newb);
 		if (error) {
 			brelse(bp);
-			if (++reclaimed == 1) {
+			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
 				UFS_LOCK(ump);
 				softdep_request_cleanup(fs, vp, cred,
 				    FLUSH_BLOCKS_WAIT);
@@ -389,9 +406,11 @@
 			goto fail;
 		}
 		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
 		*allocblk++ = nb;
 		*lbns_remfree++ = lbn;
-		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		if (flags & BA_CLRBUF)
 			vfs_bio_clrbuf(nbp);
@@ -417,12 +436,15 @@
 	brelse(bp);
 	if (flags & BA_CLRBUF) {
 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
-		if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+		if (seqcount != 0 &&
+		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
+		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
 			error = cluster_read(vp, ip->i_size, lbn,
 			    (int)fs->fs_bsize, NOCRED,
-			    MAXBSIZE, seqcount, &nbp);
+			    MAXBSIZE, seqcount, gbflags, &nbp);
 		} else {
-			error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
+			error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
+			    gbflags, &nbp);
 		}
 		if (error) {
 			brelse(nbp);
@@ -429,7 +451,7 @@
 			goto fail;
 		}
 	} else {
-		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
 		nbp->b_blkno = fsbtodb(fs, nb);
 	}
 	curthread_pflags_restore(saved_inbdflush);
@@ -451,7 +473,7 @@
 	 * We have to sync it at the end so that the soft updates code
 	 * does not find any untracked changes. Although this is really
 	 * slow, running out of disk space is not expected to be a common
-	 * occurence. The error return from fsync is ignored as we already
+	 * occurrence. The error return from fsync is ignored as we already
 	 * have an error to return to the user.
 	 *
 	 * XXX Still have to journal the free below
@@ -463,10 +485,16 @@
 		 * We shall not leave the freed blocks on the vnode
 		 * buffer object lists.
 		 */
-		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
+		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+		    GB_NOCREAT | GB_UNMAPPED);
 		if (bp != NULL) {
-			bp->b_flags |= (B_INVAL | B_RELBUF);
-			bp->b_flags &= ~B_ASYNC;
+			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
+			    ("mismatch1 l %jd %jd b %ju %ju",
+			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
+			    (uintmax_t)bp->b_blkno,
+			    (uintmax_t)fsbtodb(fs, *blkp)));
+			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
+			bp->b_flags &= ~(B_ASYNC | B_CACHE);
 			brelse(bp);
 		}
 		deallocated += fs->fs_bsize;
@@ -509,6 +537,18 @@
 	 * cleared, free the blocks.
 	 */
 	for (blkp = allociblk; blkp < allocblk; blkp++) {
+#ifdef INVARIANTS
+		if (blkp == allociblk)
+			lbns_remfree = lbns;
+		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+		    GB_NOCREAT | GB_UNMAPPED);
+		if (bp != NULL) {
+			panic("zombie1 %jd %ju %ju",
+			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
+			    (uintmax_t)fsbtodb(fs, *blkp));
+		}
+		lbns_remfree++;
+#endif
 		ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
 		    ip->i_number, vp->v_type, NULL);
 	}
@@ -541,7 +581,7 @@
 	int saved_inbdflush;
 	static struct timeval lastfail;
 	static int curfail;
-	int reclaimed;
+	int gbflags, reclaimed;
 
 	ip = VTOI(vp);
 	dp = ip->i_din2;
@@ -555,6 +595,7 @@
 	*bpp = NULL;
 	if (lbn < 0)
 		return (EFBIG);
+	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
 
 	if (DOINGSOFTDEP(vp))
 		softdep_prealloc(vp, MNT_WAIT);
@@ -605,7 +646,8 @@
 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
 		nb = dp->di_extb[lbn];
 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
-			error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
+			error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
+			    gbflags, &bp);
 			if (error) {
 				brelse(bp);
 				return (error);
@@ -622,7 +664,8 @@
 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
 			nsize = fragroundup(fs, size);
 			if (nsize <= osize) {
-				error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
+				error = bread_gb(vp, -1 - lbn, osize, NOCRED,
+				    gbflags, &bp);
 				if (error) {
 					brelse(bp);
 					return (error);
@@ -655,7 +698,7 @@
 			   nsize, flags, cred, &newb);
 			if (error)
 				return (error);
-			bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0);
+			bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
 			bp->b_blkno = fsbtodb(fs, newb);
 			bp->b_xflags |= BX_ALTDATA;
 			if (flags & BA_CLRBUF)
@@ -709,7 +752,8 @@
 			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
 		nb = dp->di_db[lbn];
 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
-			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
+			error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
+			    gbflags, &bp);
 			if (error) {
 				brelse(bp);
 				return (error);
@@ -725,7 +769,8 @@
 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
 			nsize = fragroundup(fs, size);
 			if (nsize <= osize) {
-				error = bread(vp, lbn, osize, NOCRED, &bp);
+				error = bread_gb(vp, lbn, osize, NOCRED,
+				    gbflags, &bp);
 				if (error) {
 					brelse(bp);
 					return (error);
@@ -755,7 +800,7 @@
 				&dp->di_db[0]), nsize, flags, cred, &newb);
 			if (error)
 				return (error);
-			bp = getblk(vp, lbn, nsize, 0, 0, 0);
+			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
 			bp->b_blkno = fsbtodb(fs, newb);
 			if (flags & BA_CLRBUF)
 				vfs_bio_clrbuf(bp);
@@ -791,7 +836,7 @@
 		UFS_LOCK(ump);
 		pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
 		    (ufs2_daddr_t *)0);
-	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
 		    flags, cred, &newb)) != 0) {
 			curthread_pflags_restore(saved_inbdflush);
 			return (error);
@@ -798,9 +843,12 @@
 		}
 		pref = newb + fs->fs_frag;
 		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
 		*allocblk++ = nb;
 		*lbns_remfree++ = indirs[1].in_lbn;
-		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
+		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
+		    GB_UNMAPPED);
 		bp->b_blkno = fsbtodb(fs, nb);
 		vfs_bio_clrbuf(bp);
 		if (DOINGSOFTDEP(vp)) {
@@ -842,6 +890,10 @@
 			continue;
 		}
 		UFS_LOCK(ump);
+		/*
+		 * If parent indirect has just been allocated, try to cluster
+		 * immediately following it.
+		 */
 		if (pref == 0)
 			pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
 			    (ufs2_daddr_t *)0);
@@ -848,7 +900,7 @@
 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
 			brelse(bp);
-			if (++reclaimed == 1) {
+			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
 				UFS_LOCK(ump);
 				softdep_request_cleanup(fs, vp, cred,
 				    FLUSH_BLOCKS_WAIT);
@@ -864,9 +916,12 @@
 		}
 		pref = newb + fs->fs_frag;
 		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
 		*allocblk++ = nb;
 		*lbns_remfree++ = indirs[i].in_lbn;
-		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
+		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
+		    GB_UNMAPPED);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		vfs_bio_clrbuf(nbp);
 		if (DOINGSOFTDEP(vp)) {
@@ -911,7 +966,14 @@
 	 */
 	if (nb == 0) {
 		UFS_LOCK(ump);
-		if (pref == 0)
+		/*
+		 * If allocating metadata at the front of the cylinder
+		 * group and parent indirect block has just been allocated,
+		 * then cluster next to it if it is the first indirect in
+		 * the file. Otherwise it has been allocated in the metadata
+		 * area, so we want to find our own place out in the data area.
+		 */
+		if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0))
 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
 			    &bap[0]);
 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
@@ -918,7 +980,7 @@
 		    flags | IO_BUFLOCKED, cred, &newb);
 		if (error) {
 			brelse(bp);
-			if (++reclaimed == 1) {
+			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
 				UFS_LOCK(ump);
 				softdep_request_cleanup(fs, vp, cred,
 				    FLUSH_BLOCKS_WAIT);
@@ -933,9 +995,11 @@
 			goto fail;
 		}
 		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
 		*allocblk++ = nb;
 		*lbns_remfree++ = lbn;
-		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		if (flags & BA_CLRBUF)
 			vfs_bio_clrbuf(nbp);
@@ -967,12 +1031,15 @@
 	 */
 	if (flags & BA_CLRBUF) {
 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
-		if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+		if (seqcount != 0 &&
+		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
+		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
 			error = cluster_read(vp, ip->i_size, lbn,
 			    (int)fs->fs_bsize, NOCRED,
-			    MAXBSIZE, seqcount, &nbp);
+			    MAXBSIZE, seqcount, gbflags, &nbp);
 		} else {
-			error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
+			error = bread_gb(vp, lbn, (int)fs->fs_bsize,
+			    NOCRED, gbflags, &nbp);
 		}
 		if (error) {
 			brelse(nbp);
@@ -979,7 +1046,7 @@
 			goto fail;
 		}
 	} else {
-		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
 		nbp->b_blkno = fsbtodb(fs, nb);
 	}
 	curthread_pflags_restore(saved_inbdflush);
@@ -1001,7 +1068,7 @@
 	 * We have to sync it at the end so that the soft updates code
 	 * does not find any untracked changes. Although this is really
 	 * slow, running out of disk space is not expected to be a common
-	 * occurence. The error return from fsync is ignored as we already
+	 * occurrence. The error return from fsync is ignored as we already
 	 * have an error to return to the user.
 	 *
 	 * XXX Still have to journal the free below
@@ -1013,10 +1080,16 @@
 		 * We shall not leave the freed blocks on the vnode
 		 * buffer object lists.
 		 */
-		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
+		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+		    GB_NOCREAT | GB_UNMAPPED);
 		if (bp != NULL) {
-			bp->b_flags |= (B_INVAL | B_RELBUF);
-			bp->b_flags &= ~B_ASYNC;
+			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
+			    ("mismatch2 l %jd %jd b %ju %ju",
+			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
+			    (uintmax_t)bp->b_blkno,
+			    (uintmax_t)fsbtodb(fs, *blkp)));
+			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
+			bp->b_flags &= ~(B_ASYNC | B_CACHE);
 			brelse(bp);
 		}
 		deallocated += fs->fs_bsize;
@@ -1059,6 +1132,18 @@
 	 * cleared, free the blocks.
 	 */
 	for (blkp = allociblk; blkp < allocblk; blkp++) {
+#ifdef INVARIANTS
+		if (blkp == allociblk)
+			lbns_remfree = lbns;
+		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+		    GB_NOCREAT | GB_UNMAPPED);
+		if (bp != NULL) {
+			panic("zombie2 %jd %ju %ju",
+			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
+			    (uintmax_t)fsbtodb(fs, *blkp));
+		}
+		lbns_remfree++;
+#endif
 		ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
 		    ip->i_number, vp->v_type, NULL);
 	}

Modified: trunk/sys/ufs/ffs/ffs_extern.h
===================================================================
--- trunk/sys/ufs/ffs/ffs_extern.h	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_extern.h	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
@@ -27,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)ffs_extern.h	8.6 (Berkeley) 3/30/95
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ffs/ffs_extern.h 306175 2016-09-22 10:42:40Z kib $
  */
 
 #ifndef _UFS_FFS_EXTERN_H
@@ -95,7 +96,7 @@
 void	process_deferred_inactive(struct mount *mp);
 void	ffs_sync_snap(struct mount *, int);
 int	ffs_syncvnode(struct vnode *vp, int waitfor, int flags);
-int	ffs_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *);
+int	ffs_truncate(struct vnode *, off_t, int, struct ucred *);
 int	ffs_update(struct vnode *, int);
 int	ffs_valloc(struct vnode *, int, struct ucred *, struct vnode **);
 
@@ -107,6 +108,9 @@
 
 #define	FFSV_FORCEINSMQ	0x0001
 
+#define	FFSR_FORCE	0x0001
+#define	FFSR_UNSUSPEND	0x0002
+
 extern struct vop_vector ffs_vnodeops1;
 extern struct vop_vector ffs_fifoops1;
 extern struct vop_vector ffs_vnodeops2;
@@ -152,9 +156,7 @@
 void	softdep_fsync_mountdev(struct vnode *);
 int	softdep_sync_metadata(struct vnode *);
 int	softdep_sync_buf(struct vnode *, struct buf *, int);
-int     softdep_process_worklist(struct mount *, int);
 int     softdep_fsync(struct vnode *);
-int	softdep_waitidle(struct mount *);
 int	softdep_prealloc(struct vnode *, int);
 int	softdep_journal_lookup(struct mount *, struct vnode **);
 void	softdep_journal_freeblocks(struct inode *, struct ucred *, off_t, int);
@@ -167,10 +169,10 @@
 /*
  * Things to request flushing in softdep_request_cleanup()
  */
-#define FLUSH_INODES		1
-#define FLUSH_INODES_WAIT	2
-#define FLUSH_BLOCKS		3
-#define FLUSH_BLOCKS_WAIT	4
+#define	FLUSH_INODES		1
+#define	FLUSH_INODES_WAIT	2
+#define	FLUSH_BLOCKS		3
+#define	FLUSH_BLOCKS_WAIT	4
 /*
  * Flag to ffs_syncvnode() to request flushing of data only,
  * but skip the ffs_update() on the inode itself. Used to avoid

Modified: trunk/sys/ufs/ffs/ffs_inode.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_inode.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_inode.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -30,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_inode.c 300600 2016-05-24 10:41:34Z kib $");
 
 #include "opt_quota.h"
 
@@ -43,6 +44,7 @@
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/resourcevar.h>
+#include <sys/rwlock.h>
 #include <sys/vmmeter.h>
 #include <sys/stat.h>
 
@@ -170,12 +172,11 @@
  * disk blocks.
  */
 int
-ffs_truncate(vp, length, flags, cred, td)
+ffs_truncate(vp, length, flags, cred)
 	struct vnode *vp;
 	off_t length;
 	int flags;
 	struct ucred *cred;
-	struct thread *td;
 {
 	struct inode *ip;
 	ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR];
@@ -449,7 +450,7 @@
 	ip->i_size = osize;
 	DIP_SET(ip, i_size, osize);
 
-	error = vtruncbuf(vp, cred, td, length, fs->fs_bsize);
+	error = vtruncbuf(vp, cred, length, fs->fs_bsize);
 	if (error && (allerror == 0))
 		allerror = error;
 
@@ -562,7 +563,7 @@
 		softdep_journal_freeblocks(ip, cred, length, IO_EXT);
 	else
 		softdep_setup_freeblocks(ip, length, IO_EXT);
-	return (ffs_update(vp, !DOINGASYNC(vp)));
+	return (ffs_update(vp, (flags & IO_SYNC) != 0 || !DOINGASYNC(vp)));
 }
 
 /*

Modified: trunk/sys/ufs/ffs/ffs_rawread.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_rawread.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_rawread.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2000-2003 Tor Egge
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_rawread.c 318267 2017-05-14 12:00:00Z kib $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -42,6 +43,7 @@
 #include <sys/ttycom.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
+#include <sys/rwlock.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
@@ -70,8 +72,6 @@
 
 int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
 
-void ffs_rawread_setup(void);
-
 SYSCTL_DECL(_vfs_ffs);
 
 static int ffsrawbufcnt = 4;
@@ -86,14 +86,14 @@
 SYSCTL_INT(_vfs_ffs, OID_AUTO, rawreadahead, CTLFLAG_RW, &rawreadahead, 0,
 	   "Flag to enable readahead for long raw reads");
 
+static void
+ffs_rawread_setup(void *arg __unused)
+{
 
-void
-ffs_rawread_setup(void)
-{
 	ffsrawbufcnt = (nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8;
 }
+SYSINIT(ffs_raw, SI_SUB_VM_CONF, SI_ORDER_ANY, ffs_rawread_setup, NULL);
 
-
 static int
 ffs_rawread_sync(struct vnode *vp)
 {
@@ -143,9 +143,9 @@
 		if ((obj = vp->v_object) != NULL &&
 		    (obj->flags & OBJ_MIGHTBEDIRTY) != 0) {
 			VI_UNLOCK(vp);
-			VM_OBJECT_LOCK(obj);
+			VM_OBJECT_WLOCK(obj);
 			vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
-			VM_OBJECT_UNLOCK(obj);
+			VM_OBJECT_WUNLOCK(obj);
 		} else
 			VI_UNLOCK(vp);
 
@@ -240,7 +240,7 @@
 			bp->b_bcount = bsize - blockoff * DEV_BSIZE;
 		bp->b_bufsize = bp->b_bcount;
 		
-		if (vmapbuf(bp) < 0)
+		if (vmapbuf(bp, 1) < 0)
 			return EFAULT;
 		
 		maybe_yield();
@@ -259,7 +259,7 @@
 		bp->b_bcount = bsize * (1 + bforwards) - blockoff * DEV_BSIZE;
 	bp->b_bufsize = bp->b_bcount;
 	
-	if (vmapbuf(bp) < 0)
+	if (vmapbuf(bp, 1) < 0)
 		return EFAULT;
 	
 	BO_STRATEGY(&dp->v_bufobj, bp);
@@ -275,7 +275,6 @@
 	struct buf *bp, *nbp, *tbp;
 	caddr_t sa, nsa, tsa;
 	u_int iolen;
-	int spl;
 	caddr_t udata;
 	long resid;
 	off_t offset;
@@ -340,10 +339,7 @@
 			}
 		}
 		
-		spl = splbio();
 		bwait(bp, PRIBIO, "rawrd");
-		splx(spl);
-		
 		vunmapbuf(bp);
 		
 		iolen = bp->b_bcount - bp->b_resid;
@@ -416,9 +412,7 @@
 		relpbuf(bp, &ffsrawbufcnt);
 	}
 	if (nbp != NULL) {			/* Run down readahead buffer */
-		spl = splbio();
 		bwait(nbp, PRIBIO, "rawrd");
-		splx(spl);
 		vunmapbuf(nbp);
 		pbrelvp(nbp);
 		relpbuf(nbp, &ffsrawbufcnt);

Modified: trunk/sys/ufs/ffs/ffs_snapshot.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_snapshot.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_snapshot.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
  *
@@ -34,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_snapshot.c 322132 2017-08-07 02:29:09Z mckusick $");
 
 #include "opt_quota.h"
 
@@ -53,6 +54,7 @@
 #include <sys/mount.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
+#include <sys/rwlock.h>
 #include <sys/vnode.h>
 
 #include <geom/geom.h>
@@ -255,7 +257,8 @@
 	 * Create the snapshot file.
 	 */
 restart:
-	NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF, UIO_SYSSPACE, snapfile, td);
+	NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF | NOCACHE, UIO_SYSSPACE,
+	    snapfile, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if (nd.ni_vp != NULL) {
@@ -422,7 +425,7 @@
 	 */
 	for (;;) {
 		vn_finished_write(wrtmp);
-		if ((error = vfs_write_suspend(vp->v_mount)) != 0) {
+		if ((error = vfs_write_suspend(vp->v_mount, 0)) != 0) {
 			vn_start_write(NULL, &wrtmp, V_WAIT);
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 			goto out;
@@ -674,7 +677,8 @@
 	VI_LOCK(devvp);
 	fs->fs_snapinum[snaploc] = ip->i_number;
 	if (ip->i_nextsnap.tqe_prev != 0)
-		panic("ffs_snapshot: %d already on list", ip->i_number);
+		panic("ffs_snapshot: %ju already on list",
+		    (uintmax_t)ip->i_number);
 	TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap);
 	devvp->v_vflag |= VV_COPYONWRITE;
 	VI_UNLOCK(devvp);
@@ -686,7 +690,7 @@
 	/*
 	 * Resume operation on filesystem.
 	 */
-	vfs_write_resume_flags(vp->v_mount, VR_START_WRITE | VR_NO_SUSPCLR);
+	vfs_write_resume(vp->v_mount, VR_START_WRITE | VR_NO_SUSPCLR);
 	if (collectsnapstats && starttime.tv_sec > 0) {
 		nanotime(&endtime);
 		timespecsub(&endtime, &starttime);
@@ -790,7 +794,7 @@
 		brelse(nbp);
 	} else {
 		loc = blkoff(fs, fs->fs_sblockloc);
-		bcopy((char *)copy_fs, &nbp->b_data[loc], fs->fs_bsize);
+		bcopy((char *)copy_fs, &nbp->b_data[loc], (u_int)fs->fs_sbsize);
 		bawrite(nbp);
 	}
 	/*
@@ -849,7 +853,7 @@
 	mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA);
 	MNT_IUNLOCK(mp);
 	if (error)
-		(void) ffs_truncate(vp, (off_t)0, 0, NOCRED, td);
+		(void) ffs_truncate(vp, (off_t)0, 0, NOCRED);
 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
 	if (error)
 		vput(vp);
@@ -1400,7 +1404,7 @@
 	 */
 	bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0);
 	bp->b_blkno = fsbtodb(fs, blkno);
-	if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
+	if ((bp->b_flags & B_CACHE) == 0 &&
 	    (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) {
 		brelse(bp);
 		return (error);
@@ -1570,8 +1574,8 @@
 	if (xp != NULL)
 		vrele(ITOV(ip));
 	else if (snapdebug)
-		printf("ffs_snapgone: lost snapshot vnode %d\n",
-		    ip->i_number);
+		printf("ffs_snapgone: lost snapshot vnode %ju\n",
+		    (uintmax_t)ip->i_number);
 	/*
 	 * Delete snapshot inode from superblock. Keep list dense.
 	 */
@@ -1604,7 +1608,7 @@
 	struct buf *ibp;
 	struct fs *fs;
 	ufs2_daddr_t numblks, blkno, dblk;
-	int error, loc, last;
+	int error, i, last, loc;
 	struct snapdata *sn;
 
 	ip = VTOI(vp);
@@ -1624,10 +1628,14 @@
 		ip->i_nextsnap.tqe_prev = 0;
 		VI_UNLOCK(devvp);
 		lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
+		for (i = 0; i < sn->sn_lock.lk_recurse; i++)
+			lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
 		KASSERT(vp->v_vnlock == &sn->sn_lock,
 			("ffs_snapremove: lost lock mutation")); 
 		vp->v_vnlock = &vp->v_lock;
 		VI_LOCK(devvp);
+		while (sn->sn_lock.lk_recurse > 0)
+			lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
 		lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
 		try_free_snapdata(devvp);
 	} else
@@ -1741,7 +1749,7 @@
 	enum vtype vtype;
 	struct workhead *wkhd;
 {
-	struct buf *ibp, *cbp, *savedcbp = 0;
+	struct buf *ibp, *cbp, *savedcbp = NULL;
 	struct thread *td = curthread;
 	struct inode *ip;
 	struct vnode *vp = NULL;
@@ -1833,9 +1841,10 @@
 		if (size == fs->fs_bsize) {
 #ifdef DEBUG
 			if (snapdebug)
-				printf("%s %d lbn %jd from inum %d\n",
-				    "Grabonremove: snapino", ip->i_number,
-				    (intmax_t)lbn, inum);
+				printf("%s %ju lbn %jd from inum %ju\n",
+				    "Grabonremove: snapino",
+				    (uintmax_t)ip->i_number,
+				    (intmax_t)lbn, (uintmax_t)inum);
 #endif
 			/*
 			 * If journaling is tracking this write we must add
@@ -1877,9 +1886,9 @@
 			break;
 #ifdef DEBUG
 		if (snapdebug)
-			printf("%s%d lbn %jd %s %d size %ld to blkno %jd\n",
-			    "Copyonremove: snapino ", ip->i_number,
-			    (intmax_t)lbn, "for inum", inum, size,
+			printf("%s%ju lbn %jd %s %ju size %ld to blkno %jd\n",
+			    "Copyonremove: snapino ", (uintmax_t)ip->i_number,
+			    (intmax_t)lbn, "for inum", (uintmax_t)inum, size,
 			    (intmax_t)cbp->b_blkno);
 #endif
 		/*
@@ -1892,7 +1901,7 @@
 		 * dopersistence sysctl-setable flag to decide on the
 		 * persistence needed for file content data.
 		 */
-		if (savedcbp != 0) {
+		if (savedcbp != NULL) {
 			bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
 			bawrite(cbp);
 			if ((vtype == VDIR || dopersistence) &&
@@ -1936,7 +1945,7 @@
 	 */
 	if (error != 0 && wkhd != NULL)
 		softdep_freework(wkhd);
-	lockmgr(vp->v_vnlock, LK_RELEASE, NULL);
+	lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
 	return (error);
 }
 
@@ -1988,7 +1997,7 @@
 				reason = "non-snapshot";
 			} else {
 				reason = "old format snapshot";
-				(void)ffs_truncate(vp, (off_t)0, 0, NOCRED, td);
+				(void)ffs_truncate(vp, (off_t)0, 0, NOCRED);
 				(void)ffs_syncvnode(vp, MNT_WAIT, 0);
 			}
 			printf("ffs_snapshot_mount: %s inode %d\n",
@@ -2020,8 +2029,8 @@
 		 */
 		VI_LOCK(devvp);
 		if (ip->i_nextsnap.tqe_prev != 0)
-			panic("ffs_snapshot_mount: %d already on list",
-			    ip->i_number);
+			panic("ffs_snapshot_mount: %ju already on list",
+			    (uintmax_t)ip->i_number);
 		else
 			TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap);
 		vp->v_vflag |= VV_SYSTEM;
@@ -2202,10 +2211,8 @@
 			if (bp_bdskip) {
 				VI_LOCK(devvp);
 				if (!ffs_bp_snapblk(vp, nbp)) {
-					if (BO_MTX(bo) != VI_MTX(vp)) {
-						VI_UNLOCK(devvp);
-						BO_LOCK(bo);
-					}
+					VI_UNLOCK(devvp);
+					BO_LOCK(bo);
 					BUF_UNLOCK(nbp);
 					continue;
 				}
@@ -2235,11 +2242,11 @@
 	struct buf *bp;
 {
 	struct snapdata *sn;
-	struct buf *ibp, *cbp, *savedcbp = 0;
+	struct buf *ibp, *cbp, *savedcbp = NULL;
 	struct thread *td = curthread;
 	struct fs *fs;
 	struct inode *ip;
-	struct vnode *vp = 0;
+	struct vnode *vp = NULL;
 	ufs2_daddr_t lbn, blkno, *snapblklist;
 	int lower, upper, mid, indiroff, error = 0;
 	int launched_async_io, prev_norunningbuf;
@@ -2365,12 +2372,13 @@
 			break;
 #ifdef DEBUG
 		if (snapdebug) {
-			printf("Copyonwrite: snapino %d lbn %jd for ",
-			    ip->i_number, (intmax_t)lbn);
+			printf("Copyonwrite: snapino %ju lbn %jd for ",
+			    (uintmax_t)ip->i_number, (intmax_t)lbn);
 			if (bp->b_vp == devvp)
 				printf("fs metadata");
 			else
-				printf("inum %d", VTOI(bp->b_vp)->i_number);
+				printf("inum %ju",
+				    (uintmax_t)VTOI(bp->b_vp)->i_number);
 			printf(" lblkno %jd to blkno %jd\n",
 			    (intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno);
 		}
@@ -2385,7 +2393,7 @@
 		 * dopersistence sysctl-setable flag to decide on the
 		 * persistence needed for file content data.
 		 */
-		if (savedcbp != 0) {
+		if (savedcbp != NULL) {
 			bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
 			bawrite(cbp);
 			if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
@@ -2636,33 +2644,47 @@
 static struct snapdata *
 ffs_snapdata_acquire(struct vnode *devvp)
 {
-	struct snapdata *nsn;
-	struct snapdata *sn;
+	struct snapdata *nsn, *sn;
+	int error;
 
 	/*
- 	 * Allocate a free snapdata.  This is done before acquiring the
+	 * Allocate a free snapdata.  This is done before acquiring the
 	 * devvp lock to avoid allocation while the devvp interlock is
 	 * held.
 	 */
 	nsn = ffs_snapdata_alloc();
-	/*
-	 * If there snapshots already exist on this filesystem grab a
-	 * reference to the shared lock.  Otherwise this is the first
-	 * snapshot on this filesystem and we need to use our
-	 * pre-allocated snapdata.
-	 */
-	VI_LOCK(devvp);
-	if (devvp->v_rdev->si_snapdata == NULL) {
-		devvp->v_rdev->si_snapdata = nsn;
-		nsn = NULL;
+
+	for (;;) {
+		VI_LOCK(devvp);
+		sn = devvp->v_rdev->si_snapdata;
+		if (sn == NULL) {
+			/*
+			 * This is the first snapshot on this
+			 * filesystem and we use our pre-allocated
+			 * snapdata.  Publish sn with the sn_lock
+			 * owned by us, to avoid the race.
+			 */
+			error = lockmgr(&nsn->sn_lock, LK_EXCLUSIVE |
+			    LK_NOWAIT, NULL);
+			if (error != 0)
+				panic("leaked sn, lockmgr error %d", error);
+			sn = devvp->v_rdev->si_snapdata = nsn;
+			VI_UNLOCK(devvp);
+			nsn = NULL;
+			break;
+		}
+
+		/*
+		 * There is a snapshots which already exists on this
+		 * filesystem, grab a reference to the common lock.
+		 */
+		error = lockmgr(&sn->sn_lock, LK_INTERLOCK |
+		    LK_EXCLUSIVE | LK_SLEEPFAIL, VI_MTX(devvp));
+		if (error == 0)
+			break;
 	}
-	sn = devvp->v_rdev->si_snapdata;
+
 	/*
-	 * Acquire the snapshot lock.
-	 */
-	lockmgr(&sn->sn_lock,
-	    LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, VI_MTX(devvp));
-	/*
 	 * Free any unused snapdata.
 	 */
 	if (nsn != NULL)

Modified: trunk/sys/ufs/ffs/ffs_softdep.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_softdep.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_softdep.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright 1998, 2000 Marshall Kirk McKusick.
  * Copyright 2009, 2010 Jeffrey W. Roberson <jeff at FreeBSD.org>
@@ -40,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_softdep.c 324612 2017-10-13 22:40:57Z jhb $");
 
 #include "opt_ffs.h"
 #include "opt_quota.h"
@@ -69,6 +70,7 @@
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
+#include <sys/rwlock.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
@@ -118,7 +120,7 @@
 	return (0);
 }
 
-void 
+void
 softdep_initialize()
 {
 
@@ -137,6 +139,7 @@
 	struct mount *mp;
 {
 
+	panic("softdep_unmount called");
 }
 
 void
@@ -145,6 +148,8 @@
 	struct fs *fs;
 	struct buf *bp;
 {
+
+	panic("softdep_setup_sbupdate called");
 }
 
 void
@@ -170,7 +175,7 @@
 	panic("softdep_setup_blkmapdep called");
 }
 
-void 
+void
 softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 	struct inode *ip;
 	ufs_lbn_t lbn;
@@ -184,7 +189,7 @@
 	panic("softdep_setup_allocdirect called");
 }
 
-void 
+void
 softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 	struct inode *ip;
 	ufs_lbn_t lbn;
@@ -263,7 +268,7 @@
 	panic("softdep_freefile called");
 }
 
-int 
+int
 softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
 	struct buf *bp;
 	struct inode *dp;
@@ -276,7 +281,7 @@
 	panic("softdep_setup_directory_add called");
 }
 
-void 
+void
 softdep_change_directoryentry_offset(bp, dp, base, oldloc, newloc, entrysize)
 	struct buf *bp;
 	struct inode *dp;
@@ -289,7 +294,7 @@
 	panic("softdep_change_directoryentry_offset called");
 }
 
-void 
+void
 softdep_setup_remove(bp, dp, ip, isrmdir)
 	struct buf *bp;
 	struct inode *dp;
@@ -300,7 +305,7 @@
 	panic("softdep_setup_remove called");
 }
 
-void 
+void
 softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
 	struct buf *bp;
 	struct inode *dp;
@@ -432,8 +437,6 @@
 {
 
 	panic("%s called", __FUNCTION__);
-
-	return (0);
 }
 
 int
@@ -461,7 +464,7 @@
 	panic("softdep_load_inodeblock called");
 }
 
-void 
+void
 softdep_update_inodeblock(ip, bp, waitfor)
 	struct inode *ip;
 	struct buf *bp;
@@ -502,7 +505,7 @@
 softdep_sync_metadata(struct vnode *vp)
 {
 
-	return (0);
+	panic("softdep_sync_metadata called");
 }
 
 int
@@ -509,7 +512,7 @@
 softdep_sync_buf(struct vnode *vp, struct buf *bp, int waitfor)
 {
 
-	return (0);
+	panic("softdep_sync_buf called");
 }
 
 int
@@ -520,14 +523,6 @@
 	panic("softdep_slowdown called");
 }
 
-void
-softdep_releasefile(ip)
-	struct inode *ip;	/* inode with the zero effective link count */
-{
-
-	panic("softdep_releasefile called");
-}
-
 int
 softdep_request_cleanup(fs, vp, cred, resource)
 	struct fs *fs;
@@ -542,8 +537,8 @@
 int
 softdep_check_suspend(struct mount *mp,
 		      struct vnode *devvp,
-		      int softdep_deps,
-		      int softdep_accdeps,
+		      int softdep_depcnt,
+		      int softdep_accdepcnt,
 		      int secondary_writes,
 		      int secondary_accwrites)
 {
@@ -550,11 +545,11 @@
 	struct bufobj *bo;
 	int error;
 	
-	(void) softdep_deps,
-	(void) softdep_accdeps;
+	(void) softdep_depcnt,
+	(void) softdep_accdepcnt;
 
 	bo = &devvp->v_bufobj;
-	ASSERT_BO_LOCKED(bo);
+	ASSERT_BO_WLOCKED(bo);
 
 	MNT_ILOCK(mp);
 	while (mp->mnt_secondary_writes != 0) {
@@ -622,57 +617,22 @@
 
 FEATURE(softupdates, "FFS soft-updates support");
 
-/*
- * These definitions need to be adapted to the system to which
- * this file is being ported.
- */
-
-#define M_SOFTDEP_FLAGS	(M_WAITOK)
-
-#define	D_PAGEDEP	0
-#define	D_INODEDEP	1
-#define	D_BMSAFEMAP	2
-#define	D_NEWBLK	3
-#define	D_ALLOCDIRECT	4
-#define	D_INDIRDEP	5
-#define	D_ALLOCINDIR	6
-#define	D_FREEFRAG	7
-#define	D_FREEBLKS	8
-#define	D_FREEFILE	9
-#define	D_DIRADD	10
-#define	D_MKDIR		11
-#define	D_DIRREM	12
-#define	D_NEWDIRBLK	13
-#define	D_FREEWORK	14
-#define	D_FREEDEP	15
-#define	D_JADDREF	16
-#define	D_JREMREF	17
-#define	D_JMVREF	18
-#define	D_JNEWBLK	19
-#define	D_JFREEBLK	20
-#define	D_JFREEFRAG	21
-#define	D_JSEG		22
-#define	D_JSEGDEP	23
-#define	D_SBDEP		24
-#define	D_JTRUNC	25
-#define	D_JFSYNC	26
-#define	D_SENTINAL	27
-#define	D_LAST		D_SENTINAL
-
-unsigned long dep_current[D_LAST + 1];
-unsigned long dep_total[D_LAST + 1];
-unsigned long dep_write[D_LAST + 1];
-
-
 static SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW, 0,
     "soft updates stats");
 static SYSCTL_NODE(_debug_softdep, OID_AUTO, total, CTLFLAG_RW, 0,
     "total dependencies allocated");
+static SYSCTL_NODE(_debug_softdep, OID_AUTO, highuse, CTLFLAG_RW, 0,
+    "high use dependencies allocated");
 static SYSCTL_NODE(_debug_softdep, OID_AUTO, current, CTLFLAG_RW, 0,
     "current dependencies allocated");
 static SYSCTL_NODE(_debug_softdep, OID_AUTO, write, CTLFLAG_RW, 0,
     "current dependencies written");
 
+unsigned long dep_current[D_LAST + 1];
+unsigned long dep_highuse[D_LAST + 1];
+unsigned long dep_total[D_LAST + 1];
+unsigned long dep_write[D_LAST + 1];
+
 #define	SOFTDEP_TYPE(type, str, long)					\
     static MALLOC_DEFINE(M_ ## type, #str, long);			\
     SYSCTL_ULONG(_debug_softdep_total, OID_AUTO, str, CTLFLAG_RD,	\
@@ -679,6 +639,8 @@
 	&dep_total[D_ ## type], 0, "");					\
     SYSCTL_ULONG(_debug_softdep_current, OID_AUTO, str, CTLFLAG_RD, 	\
 	&dep_current[D_ ## type], 0, "");				\
+    SYSCTL_ULONG(_debug_softdep_highuse, OID_AUTO, str, CTLFLAG_RD, 	\
+	&dep_highuse[D_ ## type], 0, "");				\
     SYSCTL_ULONG(_debug_softdep_write, OID_AUTO, str, CTLFLAG_RD, 	\
 	&dep_write[D_ ## type], 0, "");
 
@@ -711,9 +673,14 @@
 SOFTDEP_TYPE(JTRUNC, jtrunc, "Journal inode truncation");
 SOFTDEP_TYPE(JFSYNC, jfsync, "Journal fsync complete");
 
+static MALLOC_DEFINE(M_SENTINEL, "sentinel", "Worklist sentinel");
+
 static MALLOC_DEFINE(M_SAVEDINO, "savedino", "Saved inodes");
 static MALLOC_DEFINE(M_JBLOCKS, "jblocks", "Journal block locations");
+static MALLOC_DEFINE(M_MOUNTDATA, "softdep", "Softdep per-mount data");
 
+#define M_SOFTDEP_FLAGS	(M_WAITOK)
+
 /* 
  * translate from workitem type to memory type
  * MUST match the defines above, such that memtype[D_XXX] == M_XXX
@@ -745,11 +712,10 @@
 	M_JSEGDEP,
 	M_SBDEP,
 	M_JTRUNC,
-	M_JFSYNC
+	M_JFSYNC,
+	M_SENTINEL
 };
 
-static LIST_HEAD(mkdirlist, mkdir) mkdirlisthd;
-
 #define DtoM(type) (memtype[type])
 
 /*
@@ -765,49 +731,17 @@
 #define	DOT_OFFSET	offsetof(struct dirtemplate, dot_ino)
 
 /*
- * Forward declarations.
- */
-struct inodedep_hashhead;
-struct newblk_hashhead;
-struct pagedep_hashhead;
-struct bmsafemap_hashhead;
-
-/*
- * Private journaling structures.
- */
-struct jblocks {
-	struct jseglst	jb_segs;	/* TAILQ of current segments. */
-	struct jseg	*jb_writeseg;	/* Next write to complete. */
-	struct jseg	*jb_oldestseg;	/* Oldest segment with valid entries. */
-	struct jextent	*jb_extent;	/* Extent array. */
-	uint64_t	jb_nextseq;	/* Next sequence number. */
-	uint64_t	jb_oldestwrseq;	/* Oldest written sequence number. */
-	uint8_t		jb_needseg;	/* Need a forced segment. */
-	uint8_t		jb_suspended;	/* Did journal suspend writes? */
-	int		jb_avail;	/* Available extents. */
-	int		jb_used;	/* Last used extent. */
-	int		jb_head;	/* Allocator head. */
-	int		jb_off;		/* Allocator extent offset. */
-	int		jb_blocks;	/* Total disk blocks covered. */
-	int		jb_free;	/* Total disk blocks free. */
-	int		jb_min;		/* Minimum free space. */
-	int		jb_low;		/* Low on space. */
-	int		jb_age;		/* Insertion time of oldest rec. */
-};
-
-struct jextent {
-	ufs2_daddr_t	je_daddr;	/* Disk block address. */
-	int		je_blocks;	/* Disk block count. */
-};
-
-/*
  * Internal function prototypes.
  */
+static	void check_clear_deps(struct mount *);
 static	void softdep_error(char *, int);
+static	int softdep_process_worklist(struct mount *, int);
+static	int softdep_waitidle(struct mount *, int);
 static	void drain_output(struct vnode *);
-static	struct buf *getdirtybuf(struct buf *, struct mtx *, int);
-static	void clear_remove(struct thread *);
-static	void clear_inodedeps(struct thread *);
+static	struct buf *getdirtybuf(struct buf *, struct rwlock *, int);
+static	int check_inodedep_free(struct inodedep *);
+static	void clear_remove(struct mount *);
+static	void clear_inodedeps(struct mount *);
 static	void unlinked_inodedep(struct mount *, struct inodedep *);
 static	void clear_unlinked_inodedep(struct inodedep *);
 static	struct inodedep *first_unlinked_inodedep(struct ufsmount *);
@@ -818,16 +752,16 @@
 static	int flush_inodedep_deps(struct vnode *, struct mount *, ino_t);
 static	int flush_deplist(struct allocdirectlst *, int, int *);
 static	int sync_cgs(struct mount *, int);
-static	int handle_written_filepage(struct pagedep *, struct buf *);
+static	int handle_written_filepage(struct pagedep *, struct buf *, int);
 static	int handle_written_sbdep(struct sbdep *, struct buf *);
 static	void initiate_write_sbdep(struct sbdep *);
-static  void diradd_inode_written(struct diradd *, struct inodedep *);
+static	void diradd_inode_written(struct diradd *, struct inodedep *);
 static	int handle_written_indirdep(struct indirdep *, struct buf *,
-	    struct buf**);
-static	int handle_written_inodeblock(struct inodedep *, struct buf *);
+	    struct buf**, int);
+static	int handle_written_inodeblock(struct inodedep *, struct buf *, int);
 static	int jnewblk_rollforward(struct jnewblk *, struct fs *, struct cg *,
 	    uint8_t *);
-static	int handle_written_bmsafemap(struct bmsafemap *, struct buf *);
+static	int handle_written_bmsafemap(struct bmsafemap *, struct buf *, int);
 static	void handle_written_jaddref(struct jaddref *);
 static	void handle_written_jremref(struct jremref *);
 static	void handle_written_jseg(struct jseg *, struct buf *);
@@ -918,7 +852,7 @@
 static	inline struct freeblks *newfreeblks(struct mount *, struct inode *);
 static	void freeblks_free(struct ufsmount *, struct freeblks *, int);
 static	void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t);
-ufs2_daddr_t blkcount(struct fs *, ufs2_daddr_t, off_t);
+static	ufs2_daddr_t blkcount(struct fs *, ufs2_daddr_t, off_t);
 static	int trunc_check_buf(struct buf *, int *, ufs_lbn_t, int, int);
 static	void trunc_dependencies(struct inode *, struct freeblks *, ufs_lbn_t,
 	    int, int);
@@ -951,22 +885,24 @@
 	    struct allocdirect *, struct allocdirect *);
 static	struct freefrag *allocindir_merge(struct allocindir *,
 	    struct allocindir *);
-static	int bmsafemap_find(struct bmsafemap_hashhead *, struct mount *, int,
+static	int bmsafemap_find(struct bmsafemap_hashhead *, int,
 	    struct bmsafemap **);
 static	struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *,
 	    int cg, struct bmsafemap *);
-static	int newblk_find(struct newblk_hashhead *, struct mount *, ufs2_daddr_t,
-	    int, struct newblk **);
+static	int newblk_find(struct newblk_hashhead *, ufs2_daddr_t, int,
+	    struct newblk **);
 static	int newblk_lookup(struct mount *, ufs2_daddr_t, int, struct newblk **);
-static	int inodedep_find(struct inodedep_hashhead *, struct fs *, ino_t,
+static	int inodedep_find(struct inodedep_hashhead *, ino_t,
 	    struct inodedep **);
 static	int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **);
 static	int pagedep_lookup(struct mount *, struct buf *bp, ino_t, ufs_lbn_t,
 	    int, struct pagedep **);
 static	int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,
-	    struct mount *mp, int, struct pagedep **);
+	    struct pagedep **);
 static	void pause_timer(void *);
 static	int request_cleanup(struct mount *, int);
+static	void schedule_cleanup(struct mount *);
+static void softdep_ast_cleanup_proc(void);
 static	int process_worklist_item(struct mount *, int, int);
 static	void process_removes(struct vnode *);
 static	void process_truncates(struct vnode *);
@@ -976,12 +912,12 @@
 static	void wake_worklist(struct worklist *);
 static	void wait_worklist(struct worklist *, char *);
 static	void remove_from_worklist(struct worklist *);
-static	void softdep_flush(void);
+static	void softdep_flush(void *);
 static	void softdep_flushjournal(struct mount *);
-static	int softdep_speedup(void);
-static	void worklist_speedup(void);
+static	int softdep_speedup(struct ufsmount *);
+static	void worklist_speedup(struct mount *);
 static	int journal_mount(struct mount *, struct fs *, struct ucred *);
-static	void journal_unmount(struct mount *);
+static	void journal_unmount(struct ufsmount *);
 static	int journal_space(struct ufsmount *, int);
 static	void journal_suspend(struct ufsmount *);
 static	int journal_unsuspend(struct ufsmount *ump);
@@ -988,6 +924,7 @@
 static	void softdep_prelink(struct vnode *, struct vnode *);
 static	void add_to_journal(struct worklist *);
 static	void remove_from_journal(struct worklist *);
+static	bool softdep_excess_items(struct ufsmount *, int);
 static	void softdep_process_journal(struct mount *, struct worklist *, int);
 static	struct jremref *newjremref(struct dirrem *, struct inode *,
 	    struct inode *ip, off_t, nlink_t);
@@ -999,6 +936,7 @@
 static	struct jmvref *newjmvref(struct inode *, ino_t, off_t, off_t);
 static	struct jfreeblk *newjfreeblk(struct freeblks *, ufs_lbn_t,
 	    ufs2_daddr_t, int);
+static	void adjust_newfreework(struct freeblks *, int);
 static	struct jtrunc *newjtrunc(struct freeblks *, off_t, int);
 static	void move_newblock_dep(struct jaddref *, struct inodedep *);
 static	void cancel_jfreeblk(struct freeblks *, ufs2_daddr_t);
@@ -1027,16 +965,29 @@
 static	void softdep_deallocate_dependencies(struct buf *);
 static	int softdep_count_dependencies(struct buf *bp, int);
 
+/*
+ * Global lock over all of soft updates.
+ */
 static struct mtx lk;
-MTX_SYSINIT(softdep_lock, &lk, "Softdep Lock", MTX_DEF);
+MTX_SYSINIT(softdep_lock, &lk, "Global Softdep Lock", MTX_DEF);
 
-#define TRY_ACQUIRE_LOCK(lk)		mtx_trylock(lk)
-#define ACQUIRE_LOCK(lk)		mtx_lock(lk)
-#define FREE_LOCK(lk)			mtx_unlock(lk)
+#define ACQUIRE_GBLLOCK(lk)	mtx_lock(lk)
+#define FREE_GBLLOCK(lk)	mtx_unlock(lk)
+#define GBLLOCK_OWNED(lk)	mtx_assert((lk), MA_OWNED)
 
-#define	BUF_AREC(bp)			lockallowrecurse(&(bp)->b_lock)
-#define	BUF_NOREC(bp)			lockdisablerecurse(&(bp)->b_lock)
+/*
+ * Per-filesystem soft-updates locking.
+ */
+#define LOCK_PTR(ump)		(&(ump)->um_softdep->sd_fslock)
+#define TRY_ACQUIRE_LOCK(ump)	rw_try_wlock(&(ump)->um_softdep->sd_fslock)
+#define ACQUIRE_LOCK(ump)	rw_wlock(&(ump)->um_softdep->sd_fslock)
+#define FREE_LOCK(ump)		rw_wunlock(&(ump)->um_softdep->sd_fslock)
+#define LOCK_OWNED(ump)		rw_assert(&(ump)->um_softdep->sd_fslock, \
+				    RA_WLOCKED)
 
+#define	BUF_AREC(bp)		lockallowrecurse(&(bp)->b_lock)
+#define	BUF_NOREC(bp)		lockdisablerecurse(&(bp)->b_lock)
+
 /*
  * Worklist queue management.
  * These routines require that the lock be held.
@@ -1070,7 +1021,7 @@
 {
 
 	if (locked)
-		mtx_assert(&lk, MA_OWNED);
+		LOCK_OWNED(VFSTOUFS(item->wk_mp));
 	if (item->wk_state & ONWORKLIST)
 		panic("worklist_insert: %p %s(0x%X) already on list",
 		    item, TYPENAME(item->wk_type), item->wk_state);
@@ -1085,7 +1036,7 @@
 {
 
 	if (locked)
-		mtx_assert(&lk, MA_OWNED);
+		LOCK_OWNED(VFSTOUFS(item->wk_mp));
 	if ((item->wk_state & ONWORKLIST) == 0)
 		panic("worklist_remove: %p %s(0x%X) not on list",
 		    item, TYPENAME(item->wk_type), item->wk_state);
@@ -1158,7 +1109,6 @@
 			freedep = freedep_merge(WK_FREEDEP(wk), freedep);
 	}
 
-	mtx_assert(&lk, MA_OWNED);
 	while ((wk = LIST_FIRST(src)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(dst, wk);
@@ -1200,8 +1150,12 @@
  */
 static	void workitem_free(struct worklist *, int);
 static	void workitem_alloc(struct worklist *, int, struct mount *);
+static	void workitem_reassign(struct worklist *, int);
 
-#define	WORKITEM_FREE(item, type) workitem_free((struct worklist *)(item), (type))
+#define	WORKITEM_FREE(item, type) \
+	workitem_free((struct worklist *)(item), (type))
+#define	WORKITEM_REASSIGN(item, type) \
+	workitem_reassign((struct worklist *)(item), (type))
 
 static void
 workitem_free(item, type)
@@ -1209,13 +1163,12 @@
 	int type;
 {
 	struct ufsmount *ump;
-	mtx_assert(&lk, MA_OWNED);
 
 #ifdef DEBUG
 	if (item->wk_state & ONWORKLIST)
 		panic("workitem_free: %s(0x%X) still on list",
 		    TYPENAME(item->wk_type), item->wk_state);
-	if (item->wk_type != type)
+	if (item->wk_type != type && type != D_NEWBLK)
 		panic("workitem_free: type mismatch %s != %s",
 		    TYPENAME(item->wk_type), TYPENAME(type));
 #endif
@@ -1222,9 +1175,20 @@
 	if (item->wk_state & IOWAITING)
 		wakeup(item);
 	ump = VFSTOUFS(item->wk_mp);
+	LOCK_OWNED(ump);
+	KASSERT(ump->softdep_deps > 0,
+	    ("workitem_free: %s: softdep_deps going negative",
+	    ump->um_fs->fs_fsmnt));
 	if (--ump->softdep_deps == 0 && ump->softdep_req)
 		wakeup(&ump->softdep_deps);
-	dep_current[type]--;
+	KASSERT(dep_current[item->wk_type] > 0,
+	    ("workitem_free: %s: dep_current[%s] going negative",
+	    ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+	KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
+	    ("workitem_free: %s: softdep_curdeps[%s] going negative",
+	    ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+	atomic_subtract_long(&dep_current[item->wk_type], 1);
+	ump->softdep_curdeps[item->wk_type] -= 1;
 	free(item, DtoM(type));
 }
 
@@ -1241,24 +1205,54 @@
 	item->wk_state = 0;
 
 	ump = VFSTOUFS(mp);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_GBLLOCK(&lk);
 	dep_current[type]++;
+	if (dep_current[type] > dep_highuse[type])
+		dep_highuse[type] = dep_current[type];
 	dep_total[type]++;
+	FREE_GBLLOCK(&lk);
+	ACQUIRE_LOCK(ump);
+	ump->softdep_curdeps[type] += 1;
 	ump->softdep_deps++;
 	ump->softdep_accdeps++;
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 }
 
+static void
+workitem_reassign(item, newtype)
+	struct worklist *item;
+	int newtype;
+{
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(item->wk_mp);
+	LOCK_OWNED(ump);
+	KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
+	    ("workitem_reassign: %s: softdep_curdeps[%s] going negative",
+	    VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+	ump->softdep_curdeps[item->wk_type] -= 1;
+	ump->softdep_curdeps[newtype] += 1;
+	KASSERT(dep_current[item->wk_type] > 0,
+	    ("workitem_reassign: %s: dep_current[%s] going negative",
+	    VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+	ACQUIRE_GBLLOCK(&lk);
+	dep_current[newtype]++;
+	dep_current[item->wk_type]--;
+	if (dep_current[newtype] > dep_highuse[newtype])
+		dep_highuse[newtype] = dep_current[newtype];
+	dep_total[newtype]++;
+	FREE_GBLLOCK(&lk);
+	item->wk_type = newtype;
+}
+
 /*
  * Workitem queue management
  */
 static int max_softdeps;	/* maximum number of structs before slowdown */
-static int maxindirdeps = 50;	/* max number of indirdeps before slowdown */
 static int tickdelay = 2;	/* number of ticks to pause during slowdown */
 static int proc_waiting;	/* tracks whether we have a timeout posted */
 static int *stat_countp;	/* statistic to count in proc_waiting timeout */
 static struct callout softdep_callout;
-static int req_pending;
 static int req_clear_inodedeps;	/* syncer process flush some inodedeps */
 static int req_clear_remove;	/* syncer process flush some freeblks */
 static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */
@@ -1266,6 +1260,7 @@
 /*
  * runtime statistics
  */
+static int stat_flush_threads;	/* number of softdep flushing threads */
 static int stat_worklist_push;	/* number of worklist cleanups */
 static int stat_blk_limit_push;	/* number of times block limit neared */
 static int stat_ino_limit_push;	/* number of times inode limit neared */
@@ -1290,13 +1285,14 @@
 static int stat_cleanup_inorequests; /* Number of inode cleanup requests */
 static int stat_cleanup_retries; /* Number of cleanups that needed to flush */
 static int stat_cleanup_failures; /* Number of cleanup requests that failed */
+static int stat_emptyjblocks; /* Number of potentially empty journal blocks */
 
 SYSCTL_INT(_debug_softdep, OID_AUTO, max_softdeps, CTLFLAG_RW,
     &max_softdeps, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, tickdelay, CTLFLAG_RW,
     &tickdelay, 0, "");
-SYSCTL_INT(_debug_softdep, OID_AUTO, maxindirdeps, CTLFLAG_RW,
-    &maxindirdeps, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, flush_threads, CTLFLAG_RD,
+    &stat_flush_threads, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, worklist_push, CTLFLAG_RW,
     &stat_worklist_push, 0,"");
 SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_push, CTLFLAG_RW,
@@ -1347,103 +1343,146 @@
     &stat_cleanup_failures, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, flushcache, CTLFLAG_RW,
     &softdep_flushcache, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, emptyjblocks, CTLFLAG_RD,
+    &stat_emptyjblocks, 0, "");
 
 SYSCTL_DECL(_vfs_ffs);
 
-LIST_HEAD(bmsafemap_hashhead, bmsafemap) *bmsafemap_hashtbl;
-static u_long	bmsafemap_hash;	/* size of hash table - 1 */
-
-static int compute_summary_at_mount = 0;	/* Whether to recompute the summary at mount time */
+/* Whether to recompute the summary at mount time */
+static int compute_summary_at_mount = 0;
 SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW,
 	   &compute_summary_at_mount, 0, "Recompute summary at mount");
+static int print_threads = 0;
+SYSCTL_INT(_debug_softdep, OID_AUTO, print_threads, CTLFLAG_RW,
+    &print_threads, 0, "Notify flusher thread start/stop");
 
-static struct proc *softdepproc;
-static struct kproc_desc softdep_kp = {
-	"softdepflush",
-	softdep_flush,
-	&softdepproc
-};
-SYSINIT(sdproc, SI_SUB_KTHREAD_UPDATE, SI_ORDER_ANY, kproc_start,
-    &softdep_kp);
+/* List of all filesystems mounted with soft updates */
+static TAILQ_HEAD(, mount_softdeps) softdepmounts;
 
+/*
+ * This function cleans the worklist for a filesystem.
+ * Each filesystem running with soft dependencies gets its own
+ * thread to run in this function. The thread is started up in
+ * softdep_mount and shutdown in softdep_unmount. They show up
+ * as part of the kernel "bufdaemon" process whose process
+ * entry is available in bufdaemonproc.
+ */
+static int searchfailed;
+extern struct proc *bufdaemonproc;
 static void
-softdep_flush(void)
+softdep_flush(addr)
+	void *addr;
 {
-	struct mount *nmp;
 	struct mount *mp;
+	struct thread *td;
 	struct ufsmount *ump;
-	struct thread *td;
-	int remaining;
-	int progress;
-	int vfslocked;
 
 	td = curthread;
 	td->td_pflags |= TDP_NORUNNINGBUF;
-
+	mp = (struct mount *)addr;
+	ump = VFSTOUFS(mp);
+	atomic_add_int(&stat_flush_threads, 1);
+	ACQUIRE_LOCK(ump);
+	ump->softdep_flags &= ~FLUSH_STARTING;
+	wakeup(&ump->softdep_flushtd);
+	FREE_LOCK(ump);
+	if (print_threads) {
+		if (stat_flush_threads == 1)
+			printf("Running %s at pid %d\n", bufdaemonproc->p_comm,
+			    bufdaemonproc->p_pid);
+		printf("Start thread %s\n", td->td_name);
+	}
 	for (;;) {	
-		kproc_suspend_check(softdepproc);
-		vfslocked = VFS_LOCK_GIANT((struct mount *)NULL);
-		ACQUIRE_LOCK(&lk);
+		while (softdep_process_worklist(mp, 0) > 0 ||
+		    (MOUNTEDSUJ(mp) &&
+		    VFSTOUFS(mp)->softdep_jblocks->jb_suspended))
+			kthread_suspend_check();
+		ACQUIRE_LOCK(ump);
+		if ((ump->softdep_flags & (FLUSH_CLEANUP | FLUSH_EXIT)) == 0)
+			msleep(&ump->softdep_flushtd, LOCK_PTR(ump), PVM,
+			    "sdflush", hz / 2);
+		ump->softdep_flags &= ~FLUSH_CLEANUP;
 		/*
-		 * If requested, try removing inode or removal dependencies.
+		 * Check to see if we are done and need to exit.
 		 */
-		if (req_clear_inodedeps) {
-			clear_inodedeps(td);
-			req_clear_inodedeps -= 1;
-			wakeup_one(&proc_waiting);
+		if ((ump->softdep_flags & FLUSH_EXIT) == 0) {
+			FREE_LOCK(ump);
+			continue;
 		}
-		if (req_clear_remove) {
-			clear_remove(td);
-			req_clear_remove -= 1;
-			wakeup_one(&proc_waiting);
-		}
-		FREE_LOCK(&lk);
-		VFS_UNLOCK_GIANT(vfslocked);
-		remaining = progress = 0;
-		mtx_lock(&mountlist_mtx);
-		for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp)  {
-			nmp = TAILQ_NEXT(mp, mnt_list);
-			if (MOUNTEDSOFTDEP(mp) == 0)
-				continue;
-			if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
-				continue;
-			vfslocked = VFS_LOCK_GIANT(mp);
-			progress += softdep_process_worklist(mp, 0);
-			ump = VFSTOUFS(mp);
-			remaining += ump->softdep_on_worklist;
-			VFS_UNLOCK_GIANT(vfslocked);
-			mtx_lock(&mountlist_mtx);
-			nmp = TAILQ_NEXT(mp, mnt_list);
-			vfs_unbusy(mp);
-		}
-		mtx_unlock(&mountlist_mtx);
-		if (remaining && progress)
-			continue;
-		ACQUIRE_LOCK(&lk);
-		if (!req_pending)
-			msleep(&req_pending, &lk, PVM, "sdflush", hz);
-		req_pending = 0;
-		FREE_LOCK(&lk);
+		ump->softdep_flags &= ~FLUSH_EXIT;
+		FREE_LOCK(ump);
+		wakeup(&ump->softdep_flags);
+		if (print_threads)
+			printf("Stop thread %s: searchfailed %d, did cleanups %d\n", td->td_name, searchfailed, ump->um_softdep->sd_cleanups);
+		atomic_subtract_int(&stat_flush_threads, 1);
+		kthread_exit();
+		panic("kthread_exit failed\n");
 	}
 }
 
 static void
-worklist_speedup(void)
+worklist_speedup(mp)
+	struct mount *mp;
 {
-	mtx_assert(&lk, MA_OWNED);
-	if (req_pending == 0) {
-		req_pending = 1;
-		wakeup(&req_pending);
-	}
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	if ((ump->softdep_flags & (FLUSH_CLEANUP | FLUSH_EXIT)) == 0)
+		ump->softdep_flags |= FLUSH_CLEANUP;
+	wakeup(&ump->softdep_flushtd);
 }
 
 static int
-softdep_speedup(void)
+softdep_speedup(ump)
+	struct ufsmount *ump;
 {
+	struct ufsmount *altump;
+	struct mount_softdeps *sdp;
 
-	worklist_speedup();
+	LOCK_OWNED(ump);
+	worklist_speedup(ump->um_mountp);
 	bd_speedup();
-	return speedup_syncer();
+	/*
+	 * If we have global shortages, then we need other
+	 * filesystems to help with the cleanup. Here we wakeup a
+	 * flusher thread for a filesystem that is over its fair
+	 * share of resources.
+	 */
+	if (req_clear_inodedeps || req_clear_remove) {
+		ACQUIRE_GBLLOCK(&lk);
+		TAILQ_FOREACH(sdp, &softdepmounts, sd_next) {
+			if ((altump = sdp->sd_ump) == ump)
+				continue;
+			if (((req_clear_inodedeps &&
+			    altump->softdep_curdeps[D_INODEDEP] >
+			    max_softdeps / stat_flush_threads) ||
+			    (req_clear_remove &&
+			    altump->softdep_curdeps[D_DIRREM] >
+			    (max_softdeps / 2) / stat_flush_threads)) &&
+			    TRY_ACQUIRE_LOCK(altump))
+				break;
+		}
+		if (sdp == NULL) {
+			searchfailed++;
+			FREE_GBLLOCK(&lk);
+		} else {
+			/*
+			 * Move to the end of the list so we pick a
+			 * different one on out next try.
+			 */
+			TAILQ_REMOVE(&softdepmounts, sdp, sd_next);
+			TAILQ_INSERT_TAIL(&softdepmounts, sdp, sd_next);
+			FREE_GBLLOCK(&lk);
+			if ((altump->softdep_flags &
+			    (FLUSH_CLEANUP | FLUSH_EXIT)) == 0)
+				altump->softdep_flags |= FLUSH_CLEANUP;
+			altump->um_softdep->sd_cleanups++;
+			wakeup(&altump->softdep_flushtd);
+			FREE_LOCK(altump);
+		}
+	}
+	return (speedup_syncer());
 }
 
 /*
@@ -1464,8 +1503,8 @@
 {
 	struct ufsmount *ump;
 
-	mtx_assert(&lk, MA_OWNED);
 	ump = VFSTOUFS(wk->wk_mp);
+	LOCK_OWNED(ump);
 	if (wk->wk_state & ONWORKLIST)
 		panic("add_to_worklist: %s(0x%X) already on list",
 		    TYPENAME(wk->wk_type), wk->wk_state);
@@ -1481,7 +1520,7 @@
 	}
 	ump->softdep_on_worklist += 1;
 	if (flags & WK_NODELAY)
-		worklist_speedup();
+		worklist_speedup(wk->wk_mp);
 }
 
 /*
@@ -1517,9 +1556,11 @@
 	struct worklist *wk;
 	char *wmesg;
 {
+	struct ufsmount *ump;
 
+	ump = VFSTOUFS(wk->wk_mp);
 	wk->wk_state |= IOWAITING;
-	msleep(wk, &lk, PVM, wmesg, 0);
+	msleep(wk, LOCK_PTR(ump), PVM, wmesg, 0);
 }
 
 /*
@@ -1531,58 +1572,44 @@
  * ordering ensures that no new <vfsid, inum, lbn> triples will be generated
  * until all the old ones have been purged from the dependency lists.
  */
-int 
+static int 
 softdep_process_worklist(mp, full)
 	struct mount *mp;
 	int full;
 {
-	struct thread *td = curthread;
 	int cnt, matchcnt;
 	struct ufsmount *ump;
 	long starttime;
 
 	KASSERT(mp != NULL, ("softdep_process_worklist: NULL mp"));
-	/*
-	 * Record the process identifier of our caller so that we can give
-	 * this process preferential treatment in request_cleanup below.
-	 */
+	if (MOUNTEDSOFTDEP(mp) == 0)
+		return (0);
 	matchcnt = 0;
 	ump = VFSTOUFS(mp);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	starttime = time_second;
-	softdep_process_journal(mp, NULL, full?MNT_WAIT:0);
+	softdep_process_journal(mp, NULL, full ? MNT_WAIT : 0);
+	check_clear_deps(mp);
 	while (ump->softdep_on_worklist > 0) {
 		if ((cnt = process_worklist_item(mp, 10, LK_NOWAIT)) == 0)
 			break;
 		else
 			matchcnt += cnt;
+		check_clear_deps(mp);
 		/*
-		 * If requested, try removing inode or removal dependencies.
-		 */
-		if (req_clear_inodedeps) {
-			clear_inodedeps(td);
-			req_clear_inodedeps -= 1;
-			wakeup_one(&proc_waiting);
-		}
-		if (req_clear_remove) {
-			clear_remove(td);
-			req_clear_remove -= 1;
-			wakeup_one(&proc_waiting);
-		}
-		/*
 		 * We do not generally want to stop for buffer space, but if
 		 * we are really being a buffer hog, we will stop and wait.
 		 */
 		if (should_yield()) {
-			FREE_LOCK(&lk);
-			kern_yield(PRI_UNCHANGED);
+			FREE_LOCK(ump);
+			kern_yield(PRI_USER);
 			bwillwrite();
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 		}
 		/*
 		 * Never allow processing to run for more than one
-		 * second. Otherwise the other mountpoints may get
-		 * excessively backlogged.
+		 * second. This gives the syncer thread the opportunity
+		 * to pause if appropriate.
 		 */
 		if (!full && starttime != time_second)
 			break;
@@ -1589,7 +1616,7 @@
 	}
 	if (full == 0)
 		journal_unsuspend(ump);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	return (matchcnt);
 }
 
@@ -1604,12 +1631,13 @@
 {
 	struct inodedep *inodedep;
 	struct dirrem *dirrem;
+	struct ufsmount *ump;
 	struct mount *mp;
 	ino_t inum;
 
-	mtx_assert(&lk, MA_OWNED);
-
 	mp = vp->v_mount;
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
 	inum = VTOI(vp)->i_number;
 	for (;;) {
 top:
@@ -1632,12 +1660,12 @@
 		if (dirrem == NULL)
 			return;
 		remove_from_worklist(&dirrem->dm_list);
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
 			panic("process_removes: suspended filesystem");
 		handle_workitem_remove(dirrem, 0);
 		vn_finished_secondary_write(mp);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 	}
 }
 
@@ -1653,13 +1681,14 @@
 {
 	struct inodedep *inodedep;
 	struct freeblks *freeblks;
+	struct ufsmount *ump;
 	struct mount *mp;
 	ino_t inum;
 	int cgwait;
 
-	mtx_assert(&lk, MA_OWNED);
-
 	mp = vp->v_mount;
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
 	inum = VTOI(vp)->i_number;
 	for (;;) {
 		if (inodedep_lookup(mp, inum, 0, &inodedep) == 0)
@@ -1680,9 +1709,9 @@
 			}
 			/* Freeblks is waiting on a inode write. */
 			if ((freeblks->fb_state & COMPLETE) == 0) {
-				FREE_LOCK(&lk);
+				FREE_LOCK(ump);
 				ffs_update(vp, 1);
-				ACQUIRE_LOCK(&lk);
+				ACQUIRE_LOCK(ump);
 				break;
 			}
 			if ((freeblks->fb_state & (ALLCOMPLETE | ONWORKLIST)) ==
@@ -1689,7 +1718,7 @@
 			    (ALLCOMPLETE | ONWORKLIST)) {
 				remove_from_worklist(&freeblks->fb_list);
 				freeblks->fb_state |= INPROGRESS;
-				FREE_LOCK(&lk);
+				FREE_LOCK(ump);
 				if (vn_start_secondary_write(NULL, &mp,
 				    V_NOWAIT))
 					panic("process_truncates: "
@@ -1696,7 +1725,7 @@
 					    "suspended filesystem");
 				handle_workitem_freeblocks(freeblks, 0);
 				vn_finished_secondary_write(mp);
-				ACQUIRE_LOCK(&lk);
+				ACQUIRE_LOCK(ump);
 				break;
 			}
 			if (freeblks->fb_cgwait)
@@ -1703,10 +1732,10 @@
 				cgwait++;
 		}
 		if (cgwait) {
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			sync_cgs(mp, MNT_WAIT);
 			ffs_sync_snap(mp, MNT_WAIT);
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 			continue;
 		}
 		if (freeblks == NULL)
@@ -1724,13 +1753,12 @@
 	int target;
 	int flags;
 {
-	struct worklist sintenel;
+	struct worklist sentinel;
 	struct worklist *wk;
 	struct ufsmount *ump;
 	int matchcnt;
 	int error;
 
-	mtx_assert(&lk, MA_OWNED);
 	KASSERT(mp != NULL, ("process_worklist_item: NULL mp"));
 	/*
 	 * If we are being called because of a process doing a
@@ -1741,15 +1769,16 @@
 		return (-1);
 	PHOLD(curproc);	/* Don't let the stack go away. */
 	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
 	matchcnt = 0;
-	sintenel.wk_mp = NULL;
-	sintenel.wk_type = D_SENTINAL;
-	LIST_INSERT_HEAD(&ump->softdep_workitem_pending, &sintenel, wk_list);
-	for (wk = LIST_NEXT(&sintenel, wk_list); wk != NULL;
-	    wk = LIST_NEXT(&sintenel, wk_list)) {
-		if (wk->wk_type == D_SENTINAL) {
-			LIST_REMOVE(&sintenel, wk_list);
-			LIST_INSERT_AFTER(wk, &sintenel, wk_list);
+	sentinel.wk_mp = NULL;
+	sentinel.wk_type = D_SENTINEL;
+	LIST_INSERT_HEAD(&ump->softdep_workitem_pending, &sentinel, wk_list);
+	for (wk = LIST_NEXT(&sentinel, wk_list); wk != NULL;
+	    wk = LIST_NEXT(&sentinel, wk_list)) {
+		if (wk->wk_type == D_SENTINEL) {
+			LIST_REMOVE(&sentinel, wk_list);
+			LIST_INSERT_AFTER(wk, &sentinel, wk_list);
 			continue;
 		}
 		if (wk->wk_state & INPROGRESS)
@@ -1757,7 +1786,7 @@
 			    wk);
 		wk->wk_state |= INPROGRESS;
 		remove_from_worklist(wk);
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
 			panic("process_worklist_item: suspended filesystem");
 		switch (wk->wk_type) {
@@ -1790,7 +1819,7 @@
 			/* NOTREACHED */
 		}
 		vn_finished_secondary_write(mp);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 		if (error == 0) {
 			if (++matchcnt == target)
 				break;
@@ -1806,11 +1835,11 @@
 		wake_worklist(wk);
 		add_to_worklist(wk, WK_HEAD);
 	}
-	LIST_REMOVE(&sintenel, wk_list);
+	LIST_REMOVE(&sentinel, wk_list);
 	/* Sentinal could've become the tail from remove_from_worklist. */
-	if (ump->softdep_worklist_tail == &sintenel)
+	if (ump->softdep_worklist_tail == &sentinel)
 		ump->softdep_worklist_tail =
-		    (struct worklist *)sintenel.wk_list.le_prev;
+		    (struct worklist *)sentinel.wk_list.le_prev;
 	PRELE(curproc);
 	return (matchcnt);
 }
@@ -1824,23 +1853,29 @@
 	struct buf *newbp;
 {
 	struct worklist *wk, *wktail;
+	struct ufsmount *ump;
 	int dirty;
 
+	if ((wk = LIST_FIRST(&oldbp->b_dep)) == NULL)
+		return (0);
+	KASSERT(MOUNTEDSOFTDEP(wk->wk_mp) != 0,
+	    ("softdep_move_dependencies called on non-softdep filesystem"));
 	dirty = 0;
 	wktail = NULL;
-	ACQUIRE_LOCK(&lk);
+	ump = VFSTOUFS(wk->wk_mp);
+	ACQUIRE_LOCK(ump);
 	while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {
 		LIST_REMOVE(wk, wk_list);
 		if (wk->wk_type == D_BMSAFEMAP &&
 		    bmsafemap_backgroundwrite(WK_BMSAFEMAP(wk), newbp))
 			dirty = 1;
-		if (wktail == 0)
+		if (wktail == NULL)
 			LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);
 		else
 			LIST_INSERT_AFTER(wktail, wk, wk_list);
 		wktail = wk;
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 
 	return (dirty);
 }
@@ -1855,8 +1890,8 @@
 	struct thread *td;
 {
 	struct vnode *devvp;
-	int count, error = 0;
 	struct ufsmount *ump;
+	int count, error;
 
 	/*
 	 * Alternately flush the block device associated with the mount
@@ -1865,6 +1900,7 @@
 	 * are found.
 	 */
 	*countp = 0;
+	error = 0;
 	ump = VFSTOUFS(oldmnt);
 	devvp = ump->um_devvp;
 	while ((count = softdep_process_worklist(oldmnt, 1)) > 0) {
@@ -1872,36 +1908,47 @@
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 		error = VOP_FSYNC(devvp, MNT_WAIT, td);
 		VOP_UNLOCK(devvp, 0);
-		if (error)
+		if (error != 0)
 			break;
 	}
 	return (error);
 }
 
-int
-softdep_waitidle(struct mount *mp)
+#define	SU_WAITIDLE_RETRIES	20
+static int
+softdep_waitidle(struct mount *mp, int flags __unused)
 {
 	struct ufsmount *ump;
-	int error;
-	int i;
+	struct vnode *devvp;
+	struct thread *td;
+	int error, i;
 
 	ump = VFSTOUFS(mp);
-	ACQUIRE_LOCK(&lk);
-	for (i = 0; i < 10 && ump->softdep_deps; i++) {
+	devvp = ump->um_devvp;
+	td = curthread;
+	error = 0;
+	ACQUIRE_LOCK(ump);
+	for (i = 0; i < SU_WAITIDLE_RETRIES && ump->softdep_deps != 0; i++) {
 		ump->softdep_req = 1;
-		if (ump->softdep_on_worklist)
-			panic("softdep_waitidle: work added after flush.");
-		msleep(&ump->softdep_deps, &lk, PVM, "softdeps", 1);
+		KASSERT((flags & FORCECLOSE) == 0 ||
+		    ump->softdep_on_worklist == 0,
+		    ("softdep_waitidle: work added after flush"));
+		msleep(&ump->softdep_deps, LOCK_PTR(ump), PVM | PDROP,
+		    "softdeps", 10 * hz);
+		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+		error = VOP_FSYNC(devvp, MNT_WAIT, td);
+		VOP_UNLOCK(devvp, 0);
+		ACQUIRE_LOCK(ump);
+		if (error != 0)
+			break;
 	}
 	ump->softdep_req = 0;
-	FREE_LOCK(&lk);
-	error = 0;
-	if (i == 10) {
+	if (i == SU_WAITIDLE_RETRIES && error == 0 && ump->softdep_deps != 0) {
 		error = EBUSY;
 		printf("softdep_waitidle: Failed to flush worklist for %p\n",
 		    mp);
 	}
-
+	FREE_LOCK(ump);
 	return (error);
 }
 
@@ -1921,6 +1968,8 @@
 	int error, early, depcount, loopcnt, retry_flush_count, retry;
 	int morework;
 
+	KASSERT(MOUNTEDSOFTDEP(oldmnt) != 0,
+	    ("softdep_flushfiles called on non-softdep filesystem"));
 	loopcnt = 10;
 	retry_flush_count = 3;
 retry_flush:
@@ -1956,7 +2005,7 @@
 		error = EBUSY;
 	}
 	if (!error)
-		error = softdep_waitidle(oldmnt);
+		error = softdep_waitidle(oldmnt, flags);
 	if (!error) {
 		if (oldmnt->mnt_kern_flag & MNTK_UNMOUNT) {
 			retry = 0;
@@ -1991,12 +2040,14 @@
 /*
  * Structure hashing.
  * 
- * There are three types of structures that can be looked up:
+ * There are four types of structures that can be looked up:
  *	1) pagedep structures identified by mount point, inode number,
  *	   and logical block.
  *	2) inodedep structures identified by mount point and inode number.
  *	3) newblk structures identified by mount point and
  *	   physical block number.
+ *	4) bmsafemap structures identified by mount point and
+ *	   cylinder group number.
  *
  * The "pagedep" and "inodedep" dependency structures are hashed
  * separately from the file blocks and inodes to which they correspond.
@@ -2008,34 +2059,28 @@
  * their allocdirect or allocindir structure.
  *
  * The lookup routines optionally create and hash a new instance when
- * an existing entry is not found.
+ * an existing entry is not found. The bmsafemap lookup routine always
+ * allocates a new structure if an existing one is not found.
  */
 #define DEPALLOC	0x0001	/* allocate structure if lookup fails */
-#define NODELAY		0x0002	/* cannot do background work */
 
 /*
  * Structures and routines associated with pagedep caching.
  */
-LIST_HEAD(pagedep_hashhead, pagedep) *pagedep_hashtbl;
-u_long	pagedep_hash;		/* size of hash table - 1 */
-#define	PAGEDEP_HASH(mp, inum, lbn) \
-	(&pagedep_hashtbl[((((register_t)(mp)) >> 13) + (inum) + (lbn)) & \
-	    pagedep_hash])
+#define	PAGEDEP_HASH(ump, inum, lbn) \
+	(&(ump)->pagedep_hashtbl[((inum) + (lbn)) & (ump)->pagedep_hash_size])
 
 static int
-pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp)
+pagedep_find(pagedephd, ino, lbn, pagedeppp)
 	struct pagedep_hashhead *pagedephd;
 	ino_t ino;
 	ufs_lbn_t lbn;
-	struct mount *mp;
-	int flags;
 	struct pagedep **pagedeppp;
 {
 	struct pagedep *pagedep;
 
 	LIST_FOREACH(pagedep, pagedephd, pd_hash) {
-		if (ino == pagedep->pd_ino && lbn == pagedep->pd_lbn &&
-		    mp == pagedep->pd_list.wk_mp) {
+		if (ino == pagedep->pd_ino && lbn == pagedep->pd_lbn) {
 			*pagedeppp = pagedep;
 			return (1);
 		}
@@ -2061,10 +2106,12 @@
 	struct pagedep *pagedep;
 	struct pagedep_hashhead *pagedephd;
 	struct worklist *wk;
+	struct ufsmount *ump;
 	int ret;
 	int i;
 
-	mtx_assert(&lk, MA_OWNED);
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
 	if (bp) {
 		LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 			if (wk->wk_type == D_PAGEDEP) {
@@ -2073,8 +2120,8 @@
 			}
 		}
 	}
-	pagedephd = PAGEDEP_HASH(mp, ino, lbn);
-	ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp);
+	pagedephd = PAGEDEP_HASH(ump, ino, lbn);
+	ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
 	if (ret) {
 		if (((*pagedeppp)->pd_state & ONWORKLIST) == 0 && bp)
 			WORKLIST_INSERT(&bp->b_dep, &(*pagedeppp)->pd_list);
@@ -2082,12 +2129,12 @@
 	}
 	if ((flags & DEPALLOC) == 0)
 		return (0);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	pagedep = malloc(sizeof(struct pagedep),
 	    M_PAGEDEP, M_SOFTDEP_FLAGS|M_ZERO);
 	workitem_alloc(&pagedep->pd_list, D_PAGEDEP, mp);
-	ACQUIRE_LOCK(&lk);
-	ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp);
+	ACQUIRE_LOCK(ump);
+	ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
 	if (*pagedeppp) {
 		/*
 		 * This should never happen since we only create pagedeps
@@ -2111,15 +2158,12 @@
 /*
  * Structures and routines associated with inodedep caching.
  */
-LIST_HEAD(inodedep_hashhead, inodedep) *inodedep_hashtbl;
-static u_long	inodedep_hash;	/* size of hash table - 1 */
-#define	INODEDEP_HASH(fs, inum) \
-      (&inodedep_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & inodedep_hash])
+#define	INODEDEP_HASH(ump, inum) \
+      (&(ump)->inodedep_hashtbl[(inum) & (ump)->inodedep_hash_size])
 
 static int
-inodedep_find(inodedephd, fs, inum, inodedeppp)
+inodedep_find(inodedephd, inum, inodedeppp)
 	struct inodedep_hashhead *inodedephd;
-	struct fs *fs;
 	ino_t inum;
 	struct inodedep **inodedeppp;
 {
@@ -2126,7 +2170,7 @@
 	struct inodedep *inodedep;
 
 	LIST_FOREACH(inodedep, inodedephd, id_hash)
-		if (inum == inodedep->id_ino && fs == inodedep->id_fs)
+		if (inum == inodedep->id_ino)
 			break;
 	if (inodedep) {
 		*inodedeppp = inodedep;
@@ -2151,27 +2195,32 @@
 {
 	struct inodedep *inodedep;
 	struct inodedep_hashhead *inodedephd;
+	struct ufsmount *ump;
 	struct fs *fs;
 
-	mtx_assert(&lk, MA_OWNED);
-	fs = VFSTOUFS(mp)->um_fs;
-	inodedephd = INODEDEP_HASH(fs, inum);
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	fs = ump->um_fs;
+	inodedephd = INODEDEP_HASH(ump, inum);
 
-	if (inodedep_find(inodedephd, fs, inum, inodedeppp))
+	if (inodedep_find(inodedephd, inum, inodedeppp))
 		return (1);
 	if ((flags & DEPALLOC) == 0)
 		return (0);
 	/*
-	 * If we are over our limit, try to improve the situation.
+	 * If the system is over its limit and our filesystem is
+	 * responsible for more than our share of that usage and
+	 * we are not in a rush, request some inodedep cleanup.
 	 */
-	if (dep_current[D_INODEDEP] > max_softdeps && (flags & NODELAY) == 0)
-		request_cleanup(mp, FLUSH_INODES);
-	FREE_LOCK(&lk);
+	if (softdep_excess_items(ump, D_INODEDEP))
+		schedule_cleanup(mp);
+	else
+		FREE_LOCK(ump);
 	inodedep = malloc(sizeof(struct inodedep),
 		M_INODEDEP, M_SOFTDEP_FLAGS);
 	workitem_alloc(&inodedep->id_list, D_INODEDEP, mp);
-	ACQUIRE_LOCK(&lk);
-	if (inodedep_find(inodedephd, fs, inum, inodedeppp)) {
+	ACQUIRE_LOCK(ump);
+	if (inodedep_find(inodedephd, inum, inodedeppp)) {
 		WORKITEM_FREE(inodedep, D_INODEDEP);
 		return (1);
 	}
@@ -2203,15 +2252,12 @@
 /*
  * Structures and routines associated with newblk caching.
  */
-LIST_HEAD(newblk_hashhead, newblk) *newblk_hashtbl;
-u_long	newblk_hash;		/* size of hash table - 1 */
-#define	NEWBLK_HASH(fs, inum) \
-	(&newblk_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & newblk_hash])
+#define	NEWBLK_HASH(ump, inum) \
+	(&(ump)->newblk_hashtbl[(inum) & (ump)->newblk_hash_size])
 
 static int
-newblk_find(newblkhd, mp, newblkno, flags, newblkpp)
+newblk_find(newblkhd, newblkno, flags, newblkpp)
 	struct newblk_hashhead *newblkhd;
-	struct mount *mp;
 	ufs2_daddr_t newblkno;
 	int flags;
 	struct newblk **newblkpp;
@@ -2221,8 +2267,6 @@
 	LIST_FOREACH(newblk, newblkhd, nb_hash) {
 		if (newblkno != newblk->nb_newblkno)
 			continue;
-		if (mp != newblk->nb_list.wk_mp)
-			continue;
 		/*
 		 * If we're creating a new dependency don't match those that
 		 * have already been converted to allocdirects.  This is for
@@ -2254,18 +2298,26 @@
 {
 	struct newblk *newblk;
 	struct newblk_hashhead *newblkhd;
+	struct ufsmount *ump;
 
-	newblkhd = NEWBLK_HASH(VFSTOUFS(mp)->um_fs, newblkno);
-	if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp))
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	newblkhd = NEWBLK_HASH(ump, newblkno);
+	if (newblk_find(newblkhd, newblkno, flags, newblkpp))
 		return (1);
 	if ((flags & DEPALLOC) == 0)
 		return (0);
-	FREE_LOCK(&lk);
+	if (softdep_excess_items(ump, D_NEWBLK) ||
+	    softdep_excess_items(ump, D_ALLOCDIRECT) ||
+	    softdep_excess_items(ump, D_ALLOCINDIR))
+		schedule_cleanup(mp);
+	else
+		FREE_LOCK(ump);
 	newblk = malloc(sizeof(union allblk), M_NEWBLK,
 	    M_SOFTDEP_FLAGS | M_ZERO);
 	workitem_alloc(&newblk->nb_list, D_NEWBLK, mp);
-	ACQUIRE_LOCK(&lk);
-	if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp)) {
+	ACQUIRE_LOCK(ump);
+	if (newblk_find(newblkhd, newblkno, flags, newblkpp)) {
 		WORKITEM_FREE(newblk, D_NEWBLK);
 		return (1);
 	}
@@ -2283,10 +2335,8 @@
 /*
  * Structures and routines associated with freed indirect block caching.
  */
-struct freeworklst *indir_hashtbl;
-u_long	indir_hash;		/* size of hash table - 1 */
-#define	INDIR_HASH(mp, blkno) \
-	(&indir_hashtbl[((((register_t)(mp)) >> 13) + (blkno)) & indir_hash])
+#define	INDIR_HASH(ump, blkno) \
+	(&(ump)->indir_hashtbl[(blkno) & (ump)->indir_hash_size])
 
 /*
  * Lookup an indirect block in the indir hash table.  The freework is
@@ -2299,14 +2349,14 @@
 	ufs2_daddr_t blkno;
 {
 	struct freework *freework;
-	struct freeworklst *wkhd;
+	struct indir_hashhead *wkhd;
+	struct ufsmount *ump;
 
-	wkhd = INDIR_HASH(mp, blkno);
+	ump = VFSTOUFS(mp);
+	wkhd = INDIR_HASH(ump, blkno);
 	TAILQ_FOREACH(freework, wkhd, fw_next) {
 		if (freework->fw_blkno != blkno)
 			continue;
-		if (freework->fw_list.wk_mp != mp)
-			continue;
 		indirblk_remove(freework);
 		return (1);
 	}
@@ -2324,15 +2374,17 @@
 {
 	struct jblocks *jblocks;
 	struct jseg *jseg;
+	struct ufsmount *ump;
 
-	jblocks = VFSTOUFS(freework->fw_list.wk_mp)->softdep_jblocks;
+	ump = VFSTOUFS(freework->fw_list.wk_mp);
+	jblocks = ump->softdep_jblocks;
 	jseg = TAILQ_LAST(&jblocks->jb_segs, jseglst);
 	if (jseg == NULL)
 		return;
 	
 	LIST_INSERT_HEAD(&jseg->js_indirs, freework, fw_segs);
-	TAILQ_INSERT_HEAD(INDIR_HASH(freework->fw_list.wk_mp,
-	    freework->fw_blkno), freework, fw_next);
+	TAILQ_INSERT_HEAD(INDIR_HASH(ump, freework->fw_blkno), freework,
+	    fw_next);
 	freework->fw_state &= ~DEPCOMPLETE;
 }
 
@@ -2340,10 +2392,11 @@
 indirblk_remove(freework)
 	struct freework *freework;
 {
+	struct ufsmount *ump;
 
+	ump = VFSTOUFS(freework->fw_list.wk_mp);
 	LIST_REMOVE(freework, fw_segs);
-	TAILQ_REMOVE(INDIR_HASH(freework->fw_list.wk_mp,
-	    freework->fw_blkno), freework, fw_next);
+	TAILQ_REMOVE(INDIR_HASH(ump, freework->fw_blkno), freework, fw_next);
 	freework->fw_state |= DEPCOMPLETE;
 	if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE)
 		WORKITEM_FREE(freework, D_FREEWORK);
@@ -2356,20 +2409,13 @@
 void 
 softdep_initialize()
 {
-	int i;
 
-	LIST_INIT(&mkdirlisthd);
+	TAILQ_INIT(&softdepmounts);
+#ifdef __LP64__
 	max_softdeps = desiredvnodes * 4;
-	pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP, &pagedep_hash);
-	inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, &inodedep_hash);
-	newblk_hashtbl = hashinit(desiredvnodes / 5,  M_NEWBLK, &newblk_hash);
-	bmsafemap_hashtbl = hashinit(1024, M_BMSAFEMAP, &bmsafemap_hash);
-	i = 1 << (ffs(desiredvnodes / 10) - 1);
-	indir_hashtbl = malloc(i * sizeof(indir_hashtbl[0]), M_FREEWORK,
-	    M_WAITOK);
-	indir_hash = i - 1;
-	for (i = 0; i <= indir_hash; i++)
-		TAILQ_INIT(&indir_hashtbl[i]);
+#else
+	max_softdeps = desiredvnodes * 2;
+#endif
 
 	/* initialise bioops hack */
 	bioops.io_start = softdep_disk_io_initiation;
@@ -2376,6 +2422,7 @@
 	bioops.io_complete = softdep_disk_write_complete;
 	bioops.io_deallocate = softdep_deallocate_dependencies;
 	bioops.io_countdeps = softdep_count_dependencies;
+	softdep_ast_cleanup = softdep_ast_cleanup_proc;
 
 	/* Initialize the callout with an mtx. */
 	callout_init_mtx(&softdep_callout, &lk, 0);
@@ -2389,12 +2436,14 @@
 softdep_uninitialize()
 {
 
+	/* clear bioops hack */
+	bioops.io_start = NULL;
+	bioops.io_complete = NULL;
+	bioops.io_deallocate = NULL;
+	bioops.io_countdeps = NULL;
+	softdep_ast_cleanup = NULL;
+
 	callout_drain(&softdep_callout);
-	hashdestroy(pagedep_hashtbl, M_PAGEDEP, pagedep_hash);
-	hashdestroy(inodedep_hashtbl, M_INODEDEP, inodedep_hash);
-	hashdestroy(newblk_hashtbl, M_NEWBLK, newblk_hash);
-	hashdestroy(bmsafemap_hashtbl, M_BMSAFEMAP, bmsafemap_hash);
-	free(indir_hashtbl, M_FREEWORK);
 }
 
 /*
@@ -2409,11 +2458,14 @@
 	struct ucred *cred;
 {
 	struct csum_total cstotal;
+	struct mount_softdeps *sdp;
 	struct ufsmount *ump;
 	struct cg *cgp;
 	struct buf *bp;
-	int error, cyl;
+	int i, error, cyl;
 
+	sdp = malloc(sizeof(struct mount_softdeps), M_MOUNTDATA,
+	    M_WAITOK | M_ZERO);
 	MNT_ILOCK(mp);
 	mp->mnt_flag = (mp->mnt_flag & ~MNT_ASYNC) | MNT_SOFTDEP;
 	if ((mp->mnt_kern_flag & MNTK_SOFTDEP) == 0) {
@@ -2420,8 +2472,11 @@
 		mp->mnt_kern_flag = (mp->mnt_kern_flag & ~MNTK_ASYNC) | 
 			MNTK_SOFTDEP | MNTK_NOASYNC;
 	}
+	ump = VFSTOUFS(mp);
+	ump->um_softdep = sdp;
 	MNT_IUNLOCK(mp);
-	ump = VFSTOUFS(mp);
+	rw_init(LOCK_PTR(ump), "Per-Filesystem Softdep Lock");
+	sdp->sd_ump = ump;
 	LIST_INIT(&ump->softdep_workitem_pending);
 	LIST_INIT(&ump->softdep_journal_pending);
 	TAILQ_INIT(&ump->softdep_unlinked);
@@ -2429,12 +2484,48 @@
 	ump->softdep_worklist_tail = NULL;
 	ump->softdep_on_worklist = 0;
 	ump->softdep_deps = 0;
+	LIST_INIT(&ump->softdep_mkdirlisthd);
+	ump->pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP,
+	    &ump->pagedep_hash_size);
+	ump->pagedep_nextclean = 0;
+	ump->inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP,
+	    &ump->inodedep_hash_size);
+	ump->inodedep_nextclean = 0;
+	ump->newblk_hashtbl = hashinit(max_softdeps / 2,  M_NEWBLK,
+	    &ump->newblk_hash_size);
+	ump->bmsafemap_hashtbl = hashinit(1024, M_BMSAFEMAP,
+	    &ump->bmsafemap_hash_size);
+	i = 1 << (ffs(desiredvnodes / 10) - 1);
+	ump->indir_hashtbl = malloc(i * sizeof(struct indir_hashhead),
+	    M_FREEWORK, M_WAITOK);
+	ump->indir_hash_size = i - 1;
+	for (i = 0; i <= ump->indir_hash_size; i++)
+		TAILQ_INIT(&ump->indir_hashtbl[i]);
+	ACQUIRE_GBLLOCK(&lk);
+	TAILQ_INSERT_TAIL(&softdepmounts, sdp, sd_next);
+	FREE_GBLLOCK(&lk);
 	if ((fs->fs_flags & FS_SUJ) &&
 	    (error = journal_mount(mp, fs, cred)) != 0) {
 		printf("Failed to start journal: %d\n", error);
+		softdep_unmount(mp);
 		return (error);
 	}
 	/*
+	 * Start our flushing thread in the bufdaemon process.
+	 */
+	ACQUIRE_LOCK(ump);
+	ump->softdep_flags |= FLUSH_STARTING;
+	FREE_LOCK(ump);
+	kproc_kthread_add(&softdep_flush, mp, &bufdaemonproc,
+	    &ump->softdep_flushtd, 0, 0, "softdepflush", "%s worker",
+	    mp->mnt_stat.f_mntonname);
+	ACQUIRE_LOCK(ump);
+	while ((ump->softdep_flags & FLUSH_STARTING) != 0) {
+		msleep(&ump->softdep_flushtd, LOCK_PTR(ump), PVM, "sdstart",
+		    hz / 2);
+	}
+	FREE_LOCK(ump);
+	/*
 	 * When doing soft updates, the counters in the
 	 * superblock may have gotten out of sync. Recomputation
 	 * can take a long time and can be deferred for background
@@ -2449,6 +2540,7 @@
 		if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl)),
 		    fs->fs_cgsize, cred, &bp)) != 0) {
 			brelse(bp);
+			softdep_unmount(mp);
 			return (error);
 		}
 		cgp = (struct cg *)bp->b_data;
@@ -2471,16 +2563,56 @@
 softdep_unmount(mp)
 	struct mount *mp;
 {
+	struct ufsmount *ump;
+#ifdef INVARIANTS
+	int i;
+#endif
 
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_unmount called on non-softdep filesystem"));
+	ump = VFSTOUFS(mp);
 	MNT_ILOCK(mp);
 	mp->mnt_flag &= ~MNT_SOFTDEP;
 	if (MOUNTEDSUJ(mp) == 0) {
 		MNT_IUNLOCK(mp);
-		return;
+	} else {
+		mp->mnt_flag &= ~MNT_SUJ;
+		MNT_IUNLOCK(mp);
+		journal_unmount(ump);
 	}
-	mp->mnt_flag &= ~MNT_SUJ;
-	MNT_IUNLOCK(mp);
-	journal_unmount(mp);
+	/*
+	 * Shut down our flushing thread. Check for NULL is if
+	 * softdep_mount errors out before the thread has been created.
+	 */
+	if (ump->softdep_flushtd != NULL) {
+		ACQUIRE_LOCK(ump);
+		ump->softdep_flags |= FLUSH_EXIT;
+		wakeup(&ump->softdep_flushtd);
+		msleep(&ump->softdep_flags, LOCK_PTR(ump), PVM | PDROP,
+		    "sdwait", 0);
+		KASSERT((ump->softdep_flags & FLUSH_EXIT) == 0,
+		    ("Thread shutdown failed"));
+	}
+	/*
+	 * Free up our resources.
+	 */
+	ACQUIRE_GBLLOCK(&lk);
+	TAILQ_REMOVE(&softdepmounts, ump->um_softdep, sd_next);
+	FREE_GBLLOCK(&lk);
+	rw_destroy(LOCK_PTR(ump));
+	hashdestroy(ump->pagedep_hashtbl, M_PAGEDEP, ump->pagedep_hash_size);
+	hashdestroy(ump->inodedep_hashtbl, M_INODEDEP, ump->inodedep_hash_size);
+	hashdestroy(ump->newblk_hashtbl, M_NEWBLK, ump->newblk_hash_size);
+	hashdestroy(ump->bmsafemap_hashtbl, M_BMSAFEMAP,
+	    ump->bmsafemap_hash_size);
+	free(ump->indir_hashtbl, M_FREEWORK);
+#ifdef INVARIANTS
+	for (i = 0; i <= D_LAST; i++)
+		KASSERT(ump->softdep_curdeps[i] == 0,
+		    ("Unmount %s: Dep type %s != 0 (%ld)", ump->um_fs->fs_fsmnt,
+		    TYPENAME(i), ump->softdep_curdeps[i]));
+#endif
+	free(ump->um_softdep, M_MOUNTDATA);
 }
 
 static struct jblocks *
@@ -2535,9 +2667,10 @@
 	int bytes;
 {
 
+	LOCK_OWNED(VFSTOUFS(mp));
 	jblocks->jb_free += bytes / DEV_BSIZE;
 	if (jblocks->jb_suspended)
-		worklist_speedup();
+		worklist_speedup(mp);
 	wakeup(jblocks);
 }
 
@@ -2628,6 +2761,7 @@
 	struct ucred *cred;
 {
 	struct jblocks *jblocks;
+	struct ufsmount *ump;
 	struct vnode *vp;
 	struct inode *ip;
 	ufs2_daddr_t blkno;
@@ -2635,6 +2769,12 @@
 	int error;
 	int i;
 
+	ump = VFSTOUFS(mp);
+	ump->softdep_journal_tail = NULL;
+	ump->softdep_on_journal = 0;
+	ump->softdep_accdeps = 0;
+	ump->softdep_req = 0;
+	ump->softdep_jblocks = NULL;
 	error = softdep_journal_lookup(mp, &vp);
 	if (error != 0) {
 		printf("Failed to find journal.  Use tunefs to create one\n");
@@ -2659,7 +2799,7 @@
 	}
 	jblocks->jb_low = jblocks->jb_free / 3;	/* Reserve 33%. */
 	jblocks->jb_min = jblocks->jb_free / 10; /* Suspend at 10%. */
-	VFSTOUFS(mp)->softdep_jblocks = jblocks;
+	ump->softdep_jblocks = jblocks;
 out:
 	if (error == 0) {
 		MNT_ILOCK(mp);
@@ -2685,12 +2825,10 @@
 }
 
 static void
-journal_unmount(mp)
-	struct mount *mp;
+journal_unmount(ump)
+	struct ufsmount *ump;
 {
-	struct ufsmount *ump;
 
-	ump = VFSTOUFS(mp);
 	if (ump->softdep_jblocks)
 		jblocks_destroy(ump->softdep_jblocks);
 	ump->softdep_jblocks = NULL;
@@ -2707,8 +2845,8 @@
 {
 	struct ufsmount *ump;
 
-	mtx_assert(&lk, MA_OWNED);
 	ump = VFSTOUFS(wk->wk_mp);
+	LOCK_OWNED(ump);
 	if (wk->wk_state & ONWORKLIST)
 		panic("add_to_journal: %s(0x%X) already on list",
 		    TYPENAME(wk->wk_type), wk->wk_state);
@@ -2733,8 +2871,8 @@
 {
 	struct ufsmount *ump;
 
-	mtx_assert(&lk, MA_OWNED);
 	ump = VFSTOUFS(wk->wk_mp);
+	LOCK_OWNED(ump);
 #ifdef SUJ_DEBUG
 	{
 		struct worklist *wkn;
@@ -2771,7 +2909,7 @@
 	int thresh;
 {
 	struct jblocks *jblocks;
-	int avail;
+	int limit, avail;
 
 	jblocks = ump->softdep_jblocks;
 	if (jblocks == NULL)
@@ -2779,8 +2917,12 @@
 	/*
 	 * We use a tighter restriction here to prevent request_cleanup()
 	 * running in threads from running into locks we currently hold.
+	 * We have to be over the limit and our filesystem has to be
+	 * responsible for more than our share of that usage.
 	 */
-	if (dep_current[D_INODEDEP] > (max_softdeps / 10) * 9)
+	limit = (max_softdeps / 10) * 9;
+	if (dep_current[D_INODEDEP] > limit &&
+	    ump->softdep_curdeps[D_INODEDEP] > limit / stat_flush_threads)
 		return (0);
 	if (thresh)
 		thresh = jblocks->jb_min;
@@ -2805,7 +2947,7 @@
 	if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
 		stat_journal_min++;
 		mp->mnt_kern_flag |= MNTK_SUSPEND;
-		mp->mnt_susp_owner = FIRST_THREAD_IN_PROC(softdepproc);
+		mp->mnt_susp_owner = ump->softdep_flushtd;
 	}
 	jblocks->jb_suspended = 1;
 	MNT_IUNLOCK(mp);
@@ -2823,10 +2965,10 @@
 	if (jblocks != NULL && jblocks->jb_suspended &&
 	    journal_space(ump, jblocks->jb_min)) {
 		jblocks->jb_suspended = 0;
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		mp->mnt_susp_owner = curthread;
-		vfs_write_resume(mp);
-		ACQUIRE_LOCK(&lk);
+		vfs_write_resume(mp, 0);
+		ACQUIRE_LOCK(ump);
 		return (1);
 	}
 	return (0);
@@ -2851,21 +2993,26 @@
 {
 	struct ufsmount *ump;
 
+	KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
+	    ("softdep_prealloc called on non-softdep filesystem"));
 	/*
 	 * Nothing to do if we are not running journaled soft updates.
-	 * If we currently hold the snapshot lock, we must avoid handling
-	 * other resources that could cause deadlock.
+	 * If we currently hold the snapshot lock, we must avoid
+	 * handling other resources that could cause deadlock.  Do not
+	 * touch quotas vnode since it is typically recursed with
+	 * other vnode locks held.
 	 */
-	if (DOINGSUJ(vp) == 0 || IS_SNAPSHOT(VTOI(vp)))
+	if (DOINGSUJ(vp) == 0 || IS_SNAPSHOT(VTOI(vp)) ||
+	    (vp->v_vflag & VV_SYSTEM) != 0)
 		return (0);
 	ump = VFSTOUFS(vp->v_mount);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	if (journal_space(ump, 0)) {
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		return (0);
 	}
 	stat_journal_low++;
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	if (waitok == MNT_NOWAIT)
 		return (ENOSPC);
 	/*
@@ -2874,15 +3021,15 @@
 	 */
 	if ((curthread->td_pflags & TDP_COWINPROGRESS) == 0)
 		ffs_syncvnode(vp, waitok, 0);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	process_removes(vp);
 	process_truncates(vp);
 	if (journal_space(ump, 0) == 0) {
-		softdep_speedup();
+		softdep_speedup(ump);
 		if (journal_space(ump, 1) == 0)
 			journal_suspend(ump);
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 
 	return (0);
 }
@@ -2901,7 +3048,7 @@
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(dvp->v_mount);
-	mtx_assert(&lk, MA_OWNED);
+	LOCK_OWNED(ump);
 	/*
 	 * Nothing to do if we have sufficient journal space.
 	 * If we currently hold the snapshot lock, we must avoid
@@ -2910,11 +3057,11 @@
 	if (journal_space(ump, 0) || (vp && IS_SNAPSHOT(VTOI(vp))))
 		return;
 	stat_journal_low++;
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	if (vp)
 		ffs_syncvnode(vp, MNT_NOWAIT, 0);
 	ffs_syncvnode(dvp, MNT_WAIT, 0);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	/* Process vp before dvp as it may create .. removes. */
 	if (vp) {
 		process_removes(vp);
@@ -2922,10 +3069,10 @@
 	}
 	process_removes(dvp);
 	process_truncates(dvp);
-	softdep_speedup();
+	softdep_speedup(ump);
 	process_worklist_item(UFSTOVFS(ump), 2, LK_NOWAIT);
 	if (journal_space(ump, 0) == 0) {
-		softdep_speedup();
+		softdep_speedup(ump);
 		if (journal_space(ump, 1) == 0)
 			journal_suspend(ump);
 	}
@@ -3101,12 +3248,12 @@
 		return;
 	ump = VFSTOUFS(mp);
 	jblocks = ump->softdep_jblocks;
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	while (ump->softdep_on_journal) {
 		jblocks->jb_needseg = 1;
 		softdep_process_journal(mp, NULL, MNT_WAIT);
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 }
 
 static void softdep_synchronize_completed(struct bio *);
@@ -3118,6 +3265,7 @@
 {
 	struct jseg *oldest;
 	struct jseg *jseg;
+	struct ufsmount *ump;
 
 	/*
 	 * caller1 marks the last segment written before we issued the
@@ -3124,8 +3272,13 @@
 	 * synchronize cache.
 	 */
 	jseg = bp->bio_caller1;
+	if (jseg == NULL) {
+		g_destroy_bio(bp);
+		return;
+	}
+	ump = VFSTOUFS(jseg->js_list.wk_mp);
+	ACQUIRE_LOCK(ump);
 	oldest = NULL;
-	ACQUIRE_LOCK(&lk);
 	/*
 	 * Mark all the journal entries waiting on the synchronize cache
 	 * as completed so they may continue on.
@@ -3142,7 +3295,7 @@
 	if (oldest)
 		complete_jsegs(oldest);
 
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	g_destroy_bio(bp);
 }
 
@@ -3202,6 +3355,7 @@
 	bio = NULL;
 	jseg = NULL;
 	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
 	fs = ump->um_fs;
 	jblocks = ump->softdep_jblocks;
 	devbsize = ump->um_devvp->v_bufobj.bo_bsize;
@@ -3232,7 +3386,7 @@
 		cnt++;
 		/*
 		 * Verify some free journal space.  softdep_prealloc() should
-	 	 * guarantee that we don't run out so this is indicative of
+		 * guarantee that we don't run out so this is indicative of
 		 * a problem with the flow control.  Try to recover
 		 * gracefully in any event.
 		 */
@@ -3240,10 +3394,10 @@
 			if (flags != MNT_WAIT)
 				break;
 			printf("softdep: Out of journal space!\n");
-			softdep_speedup();
-			msleep(jblocks, &lk, PRIBIO, "jblocks", hz);
+			softdep_speedup(ump);
+			msleep(jblocks, LOCK_PTR(ump), PRIBIO, "jblocks", hz);
 		}
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		jseg = malloc(sizeof(*jseg), M_JSEG, M_SOFTDEP_FLAGS);
 		workitem_alloc(&jseg->js_list, D_JSEG, mp);
 		LIST_INIT(&jseg->js_entries);
@@ -3255,7 +3409,7 @@
 			bio = g_alloc_bio();
 		jseg->js_jblocks = jblocks;
 		bp = geteblk(fs->fs_bsize, 0);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 		/*
 		 * If there was a race while we were allocating the block
 		 * and jseg the entry we care about was likely written.
@@ -3267,9 +3421,9 @@
 		if (cnt + jblocks->jb_needseg == 0 || jblocks->jb_free == 0) {
 			bp->b_flags |= B_INVAL | B_NOCACHE;
 			WORKITEM_FREE(jseg, D_JSEG);
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			brelse(bp);
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 			break;
 		}
 		/*
@@ -3291,7 +3445,6 @@
 		bp->b_lblkno = bp->b_blkno;
 		bp->b_offset = bp->b_blkno * DEV_BSIZE;
 		bp->b_bcount = size;
-		bp->b_bufobj = &ump->um_devvp->v_bufobj;
 		bp->b_flags &= ~B_INVAL;
 		bp->b_flags |= B_VALIDSUSPWRT | B_NOCOPY;
 		/*
@@ -3315,6 +3468,24 @@
 		 */
 		data = bp->b_data;
 		off = 0;
+
+		/*
+		 * Always put a header on the first block.
+		 * XXX As with below, there might not be a chance to get
+		 * into the loop.  Ensure that something valid is written.
+		 */
+		jseg_write(ump, jseg, data);
+		off += JREC_SIZE;
+		data = bp->b_data + off;
+
+		/*
+		 * XXX Something is wrong here.  There's no work to do,
+		 * but we need to perform and I/O and allow it to complete
+		 * anyways.
+		 */
+		if (LIST_EMPTY(&ump->softdep_journal_pending))
+			stat_emptyjblocks++;
+
 		while ((wk = LIST_FIRST(&ump->softdep_journal_pending))
 		    != NULL) {
 			if (cnt == 0)
@@ -3364,6 +3535,11 @@
 			data = bp->b_data + off;
 			cnt--;
 		}
+
+		/* Clear any remaining space so we don't leak kernel data */
+		if (size > off)
+			bzero(data, size - off);
+
 		/*
 		 * Write this one buffer and continue.
 		 */
@@ -3370,10 +3546,8 @@
 		segwritten = 1;
 		jblocks->jb_needseg = 0;
 		WORKLIST_INSERT(&bp->b_dep, &jseg->js_list);
-		FREE_LOCK(&lk);
-		BO_LOCK(bp->b_bufobj);
-		bgetvp(ump->um_devvp, bp);
-		BO_UNLOCK(bp->b_bufobj);
+		FREE_LOCK(ump);
+		pbgetvp(ump->um_devvp, bp);
 		/*
 		 * We only do the blocking wait once we find the journal
 		 * entry we're looking for.
@@ -3382,7 +3556,7 @@
 			bwrite(bp);
 		else
 			bawrite(bp);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 	}
 	/*
 	 * If we wrote a segment issue a synchronize cache so the journal
@@ -3403,10 +3577,10 @@
 	if (flags == 0 && jblocks->jb_suspended) {
 		if (journal_unsuspend(ump))
 			return;
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		VFS_SYNC(mp, MNT_NOWAIT);
 		ffs_sbupdate(ump, MNT_WAIT, 0);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 	}
 }
 
@@ -3421,7 +3595,6 @@
 {
 	struct worklist *wk;
 	struct jmvref *jmvref;
-	int waiting;
 #ifdef INVARIANTS
 	int i = 0;
 #endif
@@ -3428,8 +3601,7 @@
 
 	while ((wk = LIST_FIRST(&jseg->js_entries)) != NULL) {
 		WORKLIST_REMOVE(wk);
-		waiting = wk->wk_state & IOWAITING;
-		wk->wk_state &= ~(INPROGRESS | IOWAITING);
+		wk->wk_state &= ~INPROGRESS;
 		wk->wk_state |= COMPLETE;
 		KASSERT(i++ < jseg->js_cnt,
 		    ("handle_written_jseg: overflow %d >= %d",
@@ -3470,8 +3642,6 @@
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
-		if (waiting)
-			wakeup(wk);
 	}
 	/* Release the self reference so the structure may be freed. */
 	rele_jseg(jseg);
@@ -3528,6 +3698,7 @@
 	 * discarded.
 	 */
 	bp->b_flags |= B_INVAL | B_NOCACHE;
+	pbrelvp(bp);
 	complete_jsegs(jseg);
 }
 
@@ -3932,8 +4103,8 @@
 /*
  * Allocate a new freework structure that may be a level in an indirect
  * when parent is not NULL or a top level block when it is.  The top level
- * freework structures are allocated without lk held and before the freeblks
- * is visible outside of softdep_setup_freeblocks().
+ * freework structures are allocated without the per-filesystem lock held
+ * and before the freeblks is visible outside of softdep_setup_freeblocks().
  */
 static struct freework *
 newfreework(ump, freeblks, parent, lbn, nb, frags, off, journal)
@@ -3964,10 +4135,10 @@
 	if (journal)
 		newjfreeblk(freeblks, lbn, nb, frags);
 	if (parent == NULL) {
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 		WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
 		freeblks->fb_ref++;
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 	}
 
 	return (freework);
@@ -4001,7 +4172,8 @@
 
 /*
  * Allocate a new jfreeblk to journal top level block pointer when truncating
- * a file.  The caller must add this to the worklist when lk is held.
+ * a file.  The caller must add this to the worklist when the per-filesystem
+ * lock is held.
  */
 static struct jfreeblk *
 newjfreeblk(freeblks, lbn, blkno, frags)
@@ -4027,6 +4199,33 @@
 }
 
 /*
+ * The journal is only prepared to handle full-size block numbers, so we
+ * have to adjust the record to reflect the change to a full-size block.
+ * For example, suppose we have a block made up of fragments 8-15 and
+ * want to free its last two fragments. We are given a request that says:
+ *     FREEBLK ino=5, blkno=14, lbn=0, frags=2, oldfrags=0
+ * where frags are the number of fragments to free and oldfrags are the
+ * number of fragments to keep. To block align it, we have to change it to
+ * have a valid full-size blkno, so it becomes:
+ *     FREEBLK ino=5, blkno=8, lbn=0, frags=2, oldfrags=6
+ */
+static void
+adjust_newfreework(freeblks, frag_offset)
+	struct freeblks *freeblks;
+	int frag_offset;
+{
+	struct jfreeblk *jfreeblk;
+
+	KASSERT((LIST_FIRST(&freeblks->fb_jblkdephd) != NULL &&
+	    LIST_FIRST(&freeblks->fb_jblkdephd)->jb_list.wk_type == D_JFREEBLK),
+	    ("adjust_newfreework: Missing freeblks dependency"));
+
+	jfreeblk = WK_JFREEBLK(LIST_FIRST(&freeblks->fb_jblkdephd));
+	jfreeblk->jf_blkno -= frag_offset;
+	jfreeblk->jf_frags += frag_offset;
+}
+
+/*
  * Allocate a new jtrunc to track a partial truncation.
  */
 static struct jtrunc *
@@ -4389,6 +4588,7 @@
 	int waitfor;
 {
 
+	LOCK_OWNED(VFSTOUFS(wk->wk_mp));
 	/*
 	 * Blocking journal waits cause slow synchronous behavior.  Record
 	 * stats on the frequency of these blocking operations.
@@ -4442,14 +4642,10 @@
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
-	int dflags;
 
 	KASSERT(ip->i_nlink >= ip->i_effnlink,
 	    ("inodedep_lookup_ip: bad delta"));
-	dflags = DEPALLOC;
-	if (IS_SNAPSHOT(ip))
-		dflags |= NODELAY;
-	(void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, dflags,
+	(void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC,
 	    &inodedep);
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 	KASSERT((inodedep->id_state & UNLINKED) == 0, ("inode unlinked"));
@@ -4472,10 +4668,12 @@
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+	    ("softdep_setup_create called on non-softdep filesystem"));
 	KASSERT(ip->i_nlink == 1,
 	    ("softdep_setup_create: Invalid link count."));
 	dvp = ITOV(dp);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(dp->i_ump);
 	inodedep = inodedep_lookup_ip(ip);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
@@ -4484,7 +4682,7 @@
 		    ("softdep_setup_create: No addref structure present."));
 	}
 	softdep_prelink(dvp, NULL);
-	FREE_LOCK(&lk);
+	FREE_LOCK(dp->i_ump);
 }
 
 /*
@@ -4501,10 +4699,10 @@
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct vnode *dvp;
-	struct vnode *vp;
 
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+	    ("softdep_setup_dotdot_link called on non-softdep filesystem"));
 	dvp = ITOV(dp);
-	vp = ITOV(ip);
 	jaddref = NULL;
 	/*
 	 * We don't set MKDIR_PARENT as this is not tied to a mkdir and
@@ -4513,13 +4711,13 @@
 	if (DOINGSUJ(dvp))
 		jaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET,
 		    dp->i_effnlink - 1, dp->i_mode);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(dp->i_ump);
 	inodedep = inodedep_lookup_ip(dp);
 	if (jaddref)
 		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
 		    if_deps);
 	softdep_prelink(dvp, ITOV(ip));
-	FREE_LOCK(&lk);
+	FREE_LOCK(dp->i_ump);
 }
 
 /*
@@ -4537,18 +4735,20 @@
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+	    ("softdep_setup_link called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	jaddref = NULL;
 	if (DOINGSUJ(dvp))
 		jaddref = newjaddref(dp, ip->i_number, 0, ip->i_effnlink - 1,
 		    ip->i_mode);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(dp->i_ump);
 	inodedep = inodedep_lookup_ip(ip);
 	if (jaddref)
 		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
 		    if_deps);
 	softdep_prelink(dvp, ITOV(ip));
-	FREE_LOCK(&lk);
+	FREE_LOCK(dp->i_ump);
 }
 
 /*
@@ -4568,6 +4768,8 @@
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+	    ("softdep_setup_mkdir called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	dotaddref = dotdotaddref = NULL;
 	if (DOINGSUJ(dvp)) {
@@ -4578,7 +4780,7 @@
 		    dp->i_effnlink - 1, dp->i_mode);
 		dotdotaddref->ja_state |= MKDIR_PARENT;
 	}
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(dp->i_ump);
 	inodedep = inodedep_lookup_ip(ip);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
@@ -4586,8 +4788,8 @@
 		KASSERT(jaddref != NULL,
 		    ("softdep_setup_mkdir: No addref structure present."));
 		KASSERT(jaddref->ja_parent == dp->i_number, 
-		    ("softdep_setup_mkdir: bad parent %d",
-		    jaddref->ja_parent));
+		    ("softdep_setup_mkdir: bad parent %ju",
+		    (uintmax_t)jaddref->ja_parent));
 		TAILQ_INSERT_BEFORE(&jaddref->ja_ref, &dotaddref->ja_ref,
 		    if_deps);
 	}
@@ -4596,7 +4798,7 @@
 		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst,
 		    &dotdotaddref->ja_ref, if_deps);
 	softdep_prelink(ITOV(dp), NULL);
-	FREE_LOCK(&lk);
+	FREE_LOCK(dp->i_ump);
 }
 
 /*
@@ -4610,12 +4812,14 @@
 {
 	struct vnode *dvp;
 
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+	    ("softdep_setup_rmdir called on non-softdep filesystem"));
 	dvp = ITOV(dp);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(dp->i_ump);
 	(void) inodedep_lookup_ip(ip);
 	(void) inodedep_lookup_ip(dp);
 	softdep_prelink(dvp, ITOV(ip));
-	FREE_LOCK(&lk);
+	FREE_LOCK(dp->i_ump);
 }
 
 /*
@@ -4629,12 +4833,14 @@
 {
 	struct vnode *dvp;
 
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+	    ("softdep_setup_unlink called on non-softdep filesystem"));
 	dvp = ITOV(dp);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(dp->i_ump);
 	(void) inodedep_lookup_ip(ip);
 	(void) inodedep_lookup_ip(dp);
 	softdep_prelink(dvp, ITOV(ip));
-	FREE_LOCK(&lk);
+	FREE_LOCK(dp->i_ump);
 }
 
 /*
@@ -4650,8 +4856,10 @@
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+	    ("softdep_revert_create called on non-softdep filesystem"));
 	dvp = ITOV(dp);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(dp->i_ump);
 	inodedep = inodedep_lookup_ip(ip);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
@@ -4660,36 +4868,10 @@
 		    ("softdep_revert_create: addref parent mismatch"));
 		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(dp->i_ump);
 }
 
 /*
- * Called to release the journal structures created by a failed dotdot link
- * creation.  Adjusts nlinkdelta for non-journaling softdep.
- */
-void
-softdep_revert_dotdot_link(dp, ip)
-	struct inode *dp;
-	struct inode *ip;
-{
-	struct inodedep *inodedep;
-	struct jaddref *jaddref;
-	struct vnode *dvp;
-
-	dvp = ITOV(dp);
-	ACQUIRE_LOCK(&lk);
-	inodedep = inodedep_lookup_ip(dp);
-	if (DOINGSUJ(dvp)) {
-		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
-		    inoreflst);
-		KASSERT(jaddref->ja_parent == ip->i_number,
-		    ("softdep_revert_dotdot_link: addref parent mismatch"));
-		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
-	}
-	FREE_LOCK(&lk);
-}
-
-/*
  * Called to release the journal structures created by a failed link
  * addition.  Adjusts nlinkdelta for non-journaling softdep.
  */
@@ -4702,8 +4884,10 @@
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+	    ("softdep_revert_link called on non-softdep filesystem"));
 	dvp = ITOV(dp);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(dp->i_ump);
 	inodedep = inodedep_lookup_ip(ip);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
@@ -4712,7 +4896,7 @@
 		    ("softdep_revert_link: addref parent mismatch"));
 		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(dp->i_ump);
 }
 
 /*
@@ -4729,9 +4913,11 @@
 	struct jaddref *dotaddref;
 	struct vnode *dvp;
 
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+	    ("softdep_revert_mkdir called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(dp->i_ump);
 	inodedep = inodedep_lookup_ip(dp);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
@@ -4753,7 +4939,7 @@
 		    ("softdep_revert_mkdir: dot addref parent mismatch"));
 		cancel_jaddref(dotaddref, inodedep, &inodedep->id_inowait);
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(dp->i_ump);
 }
 
 /* 
@@ -4765,10 +4951,12 @@
 	struct inode *ip;
 {
 
-	ACQUIRE_LOCK(&lk);
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+	    ("softdep_revert_rmdir called on non-softdep filesystem"));
+	ACQUIRE_LOCK(dp->i_ump);
 	(void) inodedep_lookup_ip(ip);
 	(void) inodedep_lookup_ip(dp);
-	FREE_LOCK(&lk);
+	FREE_LOCK(dp->i_ump);
 }
 
 /*
@@ -4820,6 +5008,8 @@
 	struct fs *fs;
 
 	mp = UFSTOVFS(ip->i_ump);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_inomapdep called on non-softdep filesystem"));
 	fs = ip->i_ump->um_fs;
 	jaddref = NULL;
 
@@ -4852,8 +5042,8 @@
 	bmsafemap = malloc(sizeof(struct bmsafemap),
 	    M_BMSAFEMAP, M_SOFTDEP_FLAGS);
 	workitem_alloc(&bmsafemap->sm_list, D_BMSAFEMAP, mp);
-	ACQUIRE_LOCK(&lk);
-	if ((inodedep_lookup(mp, newinum, DEPALLOC | NODELAY, &inodedep)))
+	ACQUIRE_LOCK(ip->i_ump);
+	if ((inodedep_lookup(mp, newinum, DEPALLOC, &inodedep)))
 		panic("softdep_setup_inomapdep: dependency %p for new"
 		    "inode already exists", inodedep);
 	bmsafemap = bmsafemap_lookup(mp, bp, ino_to_cg(fs, newinum), bmsafemap);
@@ -4867,7 +5057,7 @@
 	}
 	inodedep->id_bmsafemap = bmsafemap;
 	inodedep->id_state &= ~DEPCOMPLETE;
-	FREE_LOCK(&lk);
+	FREE_LOCK(ip->i_ump);
 }
 
 /*
@@ -4885,9 +5075,13 @@
 	struct newblk *newblk;
 	struct bmsafemap *bmsafemap;
 	struct jnewblk *jnewblk;
+	struct ufsmount *ump;
 	struct fs *fs;
 
-	fs = VFSTOUFS(mp)->um_fs;
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_blkmapdep called on non-softdep filesystem"));
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
 	jnewblk = NULL;
 	/*
 	 * Create a dependency for the newly allocated block.
@@ -4930,7 +5124,7 @@
 	CTR3(KTR_SUJ,
 	    "softdep_setup_blkmapdep: blkno %jd frags %d oldfrags %d",
 	    newblkno, frags, oldfrags);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	if (newblk_lookup(mp, newblkno, DEPALLOC, &newblk) != 0)
 		panic("softdep_setup_blkmapdep: found block");
 	newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(mp, bp,
@@ -4944,16 +5138,15 @@
 	}
 	newblk->nb_bmsafemap = bmsafemap;
 	newblk->nb_jnewblk = jnewblk;
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 }
 
-#define	BMSAFEMAP_HASH(fs, cg) \
-      (&bmsafemap_hashtbl[((((register_t)(fs)) >> 13) + (cg)) & bmsafemap_hash])
+#define	BMSAFEMAP_HASH(ump, cg) \
+      (&(ump)->bmsafemap_hashtbl[(cg) & (ump)->bmsafemap_hash_size])
 
 static int
-bmsafemap_find(bmsafemaphd, mp, cg, bmsafemapp)
+bmsafemap_find(bmsafemaphd, cg, bmsafemapp)
 	struct bmsafemap_hashhead *bmsafemaphd;
-	struct mount *mp;
 	int cg;
 	struct bmsafemap **bmsafemapp;
 {
@@ -4960,7 +5153,7 @@
 	struct bmsafemap *bmsafemap;
 
 	LIST_FOREACH(bmsafemap, bmsafemaphd, sm_hash)
-		if (bmsafemap->sm_list.wk_mp == mp && bmsafemap->sm_cg == cg)
+		if (bmsafemap->sm_cg == cg)
 			break;
 	if (bmsafemap) {
 		*bmsafemapp = bmsafemap;
@@ -4989,19 +5182,20 @@
 	struct bmsafemap_hashhead *bmsafemaphd;
 	struct bmsafemap *bmsafemap, *collision;
 	struct worklist *wk;
-	struct fs *fs;
+	struct ufsmount *ump;
 
-	mtx_assert(&lk, MA_OWNED);
-	if (bp)
-		LIST_FOREACH(wk, &bp->b_dep, wk_list)
-			if (wk->wk_type == D_BMSAFEMAP) {
-				if (newbmsafemap)
-					WORKITEM_FREE(newbmsafemap,D_BMSAFEMAP);
-				return (WK_BMSAFEMAP(wk));
-			}
-	fs = VFSTOUFS(mp)->um_fs;
-	bmsafemaphd = BMSAFEMAP_HASH(fs, cg);
-	if (bmsafemap_find(bmsafemaphd, mp, cg, &bmsafemap) == 1) {
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	KASSERT(bp != NULL, ("bmsafemap_lookup: missing buffer"));
+	LIST_FOREACH(wk, &bp->b_dep, wk_list) {
+		if (wk->wk_type == D_BMSAFEMAP) {
+			if (newbmsafemap)
+				WORKITEM_FREE(newbmsafemap, D_BMSAFEMAP);
+			return (WK_BMSAFEMAP(wk));
+		}
+	}
+	bmsafemaphd = BMSAFEMAP_HASH(ump, cg);
+	if (bmsafemap_find(bmsafemaphd, cg, &bmsafemap) == 1) {
 		if (newbmsafemap)
 			WORKITEM_FREE(newbmsafemap, D_BMSAFEMAP);
 		return (bmsafemap);
@@ -5009,11 +5203,11 @@
 	if (newbmsafemap) {
 		bmsafemap = newbmsafemap;
 	} else {
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		bmsafemap = malloc(sizeof(struct bmsafemap),
 			M_BMSAFEMAP, M_SOFTDEP_FLAGS);
 		workitem_alloc(&bmsafemap->sm_list, D_BMSAFEMAP, mp);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 	}
 	bmsafemap->sm_buf = bp;
 	LIST_INIT(&bmsafemap->sm_inodedephd);
@@ -5024,13 +5218,13 @@
 	LIST_INIT(&bmsafemap->sm_jnewblkhd);
 	LIST_INIT(&bmsafemap->sm_freehd);
 	LIST_INIT(&bmsafemap->sm_freewr);
-	if (bmsafemap_find(bmsafemaphd, mp, cg, &collision) == 1) {
+	if (bmsafemap_find(bmsafemaphd, cg, &collision) == 1) {
 		WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
 		return (collision);
 	}
 	bmsafemap->sm_cg = cg;
 	LIST_INSERT_HEAD(bmsafemaphd, bmsafemap, sm_hash);
-	LIST_INSERT_HEAD(&VFSTOUFS(mp)->softdep_dirtycg, bmsafemap, sm_next);
+	LIST_INSERT_HEAD(&ump->softdep_dirtycg, bmsafemap, sm_next);
 	WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list);
 	return (bmsafemap);
 }
@@ -5086,6 +5280,8 @@
 
 	lbn = bp->b_lblkno;
 	mp = UFSTOVFS(ip->i_ump);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_allocdirect called on non-softdep filesystem"));
 	if (oldblkno && oldblkno != newblkno)
 		freefrag = newfreefrag(ip, oldblkno, oldsize, lbn);
 	else
@@ -5095,7 +5291,7 @@
 	    "softdep_setup_allocdirect: ino %d blkno %jd oldblkno %jd "
 	    "off %jd newsize %ld oldsize %d",
 	    ip->i_number, newblkno, oldblkno, off, newsize, oldsize);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ip->i_ump);
 	if (off >= NDADDR) {
 		if (lbn > 0)
 			panic("softdep_setup_allocdirect: bad lbn %jd, off %jd",
@@ -5125,7 +5321,7 @@
 	/*
 	 * Convert the newblk to an allocdirect.
 	 */
-	newblk->nb_list.wk_type = D_ALLOCDIRECT;
+	WORKITEM_REASSIGN(newblk, D_ALLOCDIRECT);
 	adp = (struct allocdirect *)newblk;
 	newblk->nb_freefrag = freefrag;
 	adp->ad_offset = off;
@@ -5144,7 +5340,7 @@
 	if (freefrag && freefrag->ff_jdep != NULL &&
 	    freefrag->ff_jdep->wk_type == D_JFREEFRAG)
 		add_to_journal(freefrag->ff_jdep);
-	inodedep_lookup(mp, ip->i_number, DEPALLOC | NODELAY, &inodedep);
+	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	adp->ad_inodedep = inodedep;
 
 	WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list);
@@ -5167,7 +5363,7 @@
 		TAILQ_INSERT_TAIL(adphead, adp, ad_next);
 		if (oldadp != NULL && oldadp->ad_offset == off)
 			allocdirect_merge(adphead, adp, oldadp);
-		FREE_LOCK(&lk);
+		FREE_LOCK(ip->i_ump);
 		return;
 	}
 	TAILQ_FOREACH(oldadp, adphead, ad_next) {
@@ -5181,7 +5377,7 @@
 	if (oldadp->ad_offset == off)
 		allocdirect_merge(adphead, adp, oldadp);
 
-	FREE_LOCK(&lk);
+	FREE_LOCK(ip->i_ump);
 }
 
 /*
@@ -5261,7 +5457,7 @@
 	struct freefrag *freefrag;
 
 	freefrag = NULL;
-	mtx_assert(&lk, MA_OWNED);
+	LOCK_OWNED(VFSTOUFS(newadp->ad_list.wk_mp));
 	if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
 	    newadp->ad_oldsize != oldadp->ad_newsize ||
 	    newadp->ad_offset >= NDADDR)
@@ -5419,7 +5615,7 @@
 	 * safe to modify the list head here.
 	 */
 	LIST_INIT(&wkhd);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	LIST_SWAP(&freefrag->ff_jwork, &wkhd, worklist, wk_list);
 	/*
 	 * If the journal has not been written we must cancel it here.
@@ -5430,12 +5626,12 @@
 			    freefrag->ff_jdep->wk_type);
 		cancel_jnewblk(WK_JNEWBLK(freefrag->ff_jdep), &wkhd);
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	ffs_blkfree(ump, ump->um_fs, ump->um_devvp, freefrag->ff_blkno,
 	   freefrag->ff_fragsize, freefrag->ff_inum, freefrag->ff_vtype, &wkhd);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	WORKITEM_FREE(freefrag, D_FREEFRAG);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 }
 
 /*
@@ -5462,18 +5658,19 @@
 	struct mount *mp;
 	ufs_lbn_t lbn;
 
-	if (off >= NXADDR)
-		panic("softdep_setup_allocext: lbn %lld > NXADDR",
-		    (long long)off);
+	mp = UFSTOVFS(ip->i_ump);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_allocext called on non-softdep filesystem"));
+	KASSERT(off < NXADDR, ("softdep_setup_allocext: lbn %lld > NXADDR",
+		    (long long)off));
 
 	lbn = bp->b_lblkno;
-	mp = UFSTOVFS(ip->i_ump);
 	if (oldblkno && oldblkno != newblkno)
 		freefrag = newfreefrag(ip, oldblkno, oldsize, lbn);
 	else
 		freefrag = NULL;
 
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ip->i_ump);
 	if (newblk_lookup(mp, newblkno, 0, &newblk) == 0)
 		panic("softdep_setup_allocext: lost block");
 	KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
@@ -5481,7 +5678,7 @@
 	/*
 	 * Convert the newblk to an allocdirect.
 	 */
-	newblk->nb_list.wk_type = D_ALLOCDIRECT;
+	WORKITEM_REASSIGN(newblk, D_ALLOCDIRECT);
 	adp = (struct allocdirect *)newblk;
 	newblk->nb_freefrag = freefrag;
 	adp->ad_offset = off;
@@ -5501,7 +5698,7 @@
 	if (freefrag && freefrag->ff_jdep != NULL &&
 	    freefrag->ff_jdep->wk_type == D_JFREEFRAG)
 		add_to_journal(freefrag->ff_jdep);
-	inodedep_lookup(mp, ip->i_number, DEPALLOC | NODELAY, &inodedep);
+	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	adp->ad_inodedep = inodedep;
 
 	WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list);
@@ -5524,7 +5721,7 @@
 		TAILQ_INSERT_TAIL(adphead, adp, ad_next);
 		if (oldadp != NULL && oldadp->ad_offset == off)
 			allocdirect_merge(adphead, adp, oldadp);
-		FREE_LOCK(&lk);
+		FREE_LOCK(ip->i_ump);
 		return;
 	}
 	TAILQ_FOREACH(oldadp, adphead, ad_next) {
@@ -5537,7 +5734,7 @@
 	TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
 	if (oldadp->ad_offset == off)
 		allocdirect_merge(adphead, adp, oldadp);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ip->i_ump);
 }
 
 /*
@@ -5585,12 +5782,12 @@
 		freefrag = newfreefrag(ip, oldblkno, ip->i_fs->fs_bsize, lbn);
 	else
 		freefrag = NULL;
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ip->i_ump);
 	if (newblk_lookup(UFSTOVFS(ip->i_ump), newblkno, 0, &newblk) == 0)
 		panic("new_allocindir: lost block");
 	KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
 	    ("newallocindir: newblk already initialized"));
-	newblk->nb_list.wk_type = D_ALLOCINDIR;
+	WORKITEM_REASSIGN(newblk, D_ALLOCINDIR);
 	newblk->nb_freefrag = freefrag;
 	aip = (struct allocindir *)newblk;
 	aip->ai_offset = ptrno;
@@ -5626,21 +5823,19 @@
 	struct allocindir *aip;
 	struct pagedep *pagedep;
 	struct mount *mp;
-	int dflags;
 
-	if (lbn != nbp->b_lblkno)
-		panic("softdep_setup_allocindir_page: lbn %jd != lblkno %jd",
-		    lbn, bp->b_lblkno);
+	mp = UFSTOVFS(ip->i_ump);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_allocindir_page called on non-softdep filesystem"));
+	KASSERT(lbn == nbp->b_lblkno,
+	    ("softdep_setup_allocindir_page: lbn %jd != lblkno %jd",
+	    lbn, bp->b_lblkno));
 	CTR4(KTR_SUJ,
 	    "softdep_setup_allocindir_page: ino %d blkno %jd oldblkno %jd "
 	    "lbn %jd", ip->i_number, newblkno, oldblkno, lbn);
 	ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_page");
-	mp = UFSTOVFS(ip->i_ump);
 	aip = newallocindir(ip, ptrno, newblkno, oldblkno, lbn);
-	dflags = DEPALLOC;
-	if (IS_SNAPSHOT(ip))
-		dflags |= NODELAY;
-	(void) inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
+	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	/*
 	 * If we are allocating a directory page, then we must
 	 * allocate an associated pagedep to track additions and
@@ -5650,7 +5845,7 @@
 		pagedep_lookup(mp, nbp, ip->i_number, lbn, DEPALLOC, &pagedep);
 	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list);
 	freefrag = setup_allocindir_phase2(bp, ip, inodedep, aip, lbn);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ip->i_ump);
 	if (freefrag)
 		handle_workitem_freefrag(freefrag);
 }
@@ -5670,8 +5865,9 @@
 	struct inodedep *inodedep;
 	struct allocindir *aip;
 	ufs_lbn_t lbn;
-	int dflags;
 
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ip->i_ump)) != 0,
+	    ("softdep_setup_allocindir_meta called on non-softdep filesystem"));
 	CTR3(KTR_SUJ,
 	    "softdep_setup_allocindir_meta: ino %d blkno %jd ptrno %d",
 	    ip->i_number, newblkno, ptrno);
@@ -5678,14 +5874,12 @@
 	lbn = nbp->b_lblkno;
 	ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_meta");
 	aip = newallocindir(ip, ptrno, newblkno, 0, lbn);
-	dflags = DEPALLOC;
-	if (IS_SNAPSHOT(ip))
-		dflags |= NODELAY;
-	inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, dflags, &inodedep);
+	inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC,
+	    &inodedep);
 	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list);
 	if (setup_allocindir_phase2(bp, ip, inodedep, aip, lbn))
 		panic("softdep_setup_allocindir_meta: Block already existed");
-	FREE_LOCK(&lk);
+	FREE_LOCK(ip->i_ump);
 }
 
 static void
@@ -5718,11 +5912,13 @@
 {
 	struct indirdep *indirdep, *newindirdep;
 	struct newblk *newblk;
+	struct ufsmount *ump;
 	struct worklist *wk;
 	struct fs *fs;
 	ufs2_daddr_t blkno;
 
-	mtx_assert(&lk, MA_OWNED);
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
 	indirdep = NULL;
 	newindirdep = NULL;
 	fs = ip->i_fs;
@@ -5742,7 +5938,7 @@
 		if (indirdep == NULL && newindirdep != NULL)
 			break;
 		/* None found and no new structure available. */
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		newindirdep = malloc(sizeof(struct indirdep),
 		    M_INDIRDEP, M_SOFTDEP_FLAGS);
 		workitem_alloc(&newindirdep->ir_list, D_INDIRDEP, mp);
@@ -5766,7 +5962,7 @@
 		newindirdep->ir_bp = bp;
 		BUF_KERNPROC(newindirdep->ir_savebp);
 		bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 	}
 	indirdep = newindirdep;
 	WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);
@@ -5801,7 +5997,7 @@
 	struct freefrag *freefrag;
 	struct mount *mp;
 
-	mtx_assert(&lk, MA_OWNED);
+	LOCK_OWNED(ip->i_ump);
 	mp = UFSTOVFS(ip->i_ump);
 	fs = ip->i_fs;
 	if (bp->b_lblkno >= 0)
@@ -6056,7 +6252,7 @@
 	 * allocations from proceeding until we are finished with the
 	 * truncate and the block is written.
 	 */
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ip->i_ump);
 	indirdep = indirdep_lookup(mp, ip, bp);
 	if (indirdep->ir_freeblks)
 		panic("setup_trunc_indir: indirdep already truncated.");
@@ -6073,7 +6269,7 @@
 			trunc_indirdep(indirn, freeblks, bp, off);
 	} else
 		trunc_indirdep(indirdep, freeblks, bp, off);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ip->i_ump);
 	/*
 	 * Creation is protected by the buf lock. The saveddata is only
 	 * needed if a full truncation follows a partial truncation but it
@@ -6116,10 +6312,13 @@
 {
 	struct freework *fwn;
 	struct indirdep *indirdep;
+	struct ufsmount *ump;
 	struct buf *bp;
 	uintptr_t start;
 	int count;
 
+	ump = VFSTOUFS(freework->fw_list.wk_mp);
+	LOCK_OWNED(ump);
 	indirdep = freework->fw_indir;
 	for (;;) {
 		bp = indirdep->ir_bp;
@@ -6129,12 +6328,11 @@
 		/* Inline part of getdirtybuf().  We dont want bremfree. */
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0)
 			break;
-		if (BUF_LOCK(bp,
-		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, &lk) == 0)
+		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
+		    LOCK_PTR(ump)) == 0)
 			BUF_UNLOCK(bp);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 	}
-	mtx_assert(&lk, MA_OWNED);
 	freework->fw_state |= DEPCOMPLETE;
 	TAILQ_REMOVE(&indirdep->ir_trunc, freework, fw_next);
 	/*
@@ -6195,7 +6393,7 @@
  * Calculate the number of blocks we are going to release where datablocks
  * is the current total and length is the new file size.
  */
-ufs2_daddr_t
+static ufs2_daddr_t
 blkcount(fs, datablocks, length)
 	struct fs *fs;
 	ufs2_daddr_t datablocks;
@@ -6283,6 +6481,7 @@
 	struct inodedep *inodedep;
 	struct jblkdep *jblkdep;
 	struct allocdirect *adp, *adpn;
+	struct ufsmount *ump;
 	struct fs *fs;
 	struct buf *bp;
 	struct vnode *vp;
@@ -6289,10 +6488,13 @@
 	struct mount *mp;
 	ufs2_daddr_t extblocks, datablocks;
 	ufs_lbn_t tmpval, lbn, lastlbn;
-	int frags, lastoff, iboff, allocblock, needj, dflags, error, i;
+	int frags, lastoff, iboff, allocblock, needj, error, i;
 
 	fs = ip->i_fs;
-	mp = UFSTOVFS(ip->i_ump);
+	ump = ip->i_ump;
+	mp = UFSTOVFS(ump);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_journal_freeblocks called on non-softdep filesystem"));
 	vp = ITOV(ip);
 	needj = 1;
 	iboff = -1;
@@ -6301,22 +6503,19 @@
 	datablocks = 0;
 	frags = 0;
 	freeblks = newfreeblks(mp, ip);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	/*
 	 * If we're truncating a removed file that will never be written
 	 * we don't need to journal the block frees.  The canceled journals
 	 * for the allocations will suffice.
 	 */
-	dflags = DEPALLOC;
-	if (IS_SNAPSHOT(ip))
-		dflags |= NODELAY;
-	inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
+	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED &&
 	    length == 0)
 		needj = 0;
 	CTR3(KTR_SUJ, "softdep_journal_freeblks: ip %d length %ld needj %d",
 	    ip->i_number, length, needj);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	/*
 	 * Calculate the lbn that we are truncating to.  This results in -1
 	 * if we're truncating the 0 bytes.  So it is the last lbn we want
@@ -6375,8 +6574,11 @@
 				oldfrags -= frags;
 				oldfrags = numfrags(ip->i_fs, oldfrags);
 				blkno += numfrags(ip->i_fs, frags);
-				newfreework(ip->i_ump, freeblks, NULL, lastlbn,
+				newfreework(ump, freeblks, NULL, lastlbn,
 				    blkno, oldfrags, 0, needj);
+				if (needj)
+					adjust_newfreework(freeblks,
+					    numfrags(ip->i_fs, frags));
 			} else if (blkno == 0)
 				allocblock = 1;
 		}
@@ -6408,9 +6610,9 @@
 	(void) chkdq(ip, -datablocks, NOCRED, 0);
 #endif
 	freeblks->fb_chkcnt = -datablocks;
-	UFS_LOCK(ip->i_ump);
+	UFS_LOCK(ump);
 	fs->fs_pendingblocks += datablocks;
-	UFS_UNLOCK(ip->i_ump);
+	UFS_UNLOCK(ump);
 	DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - datablocks);
 	/*
 	 * Handle truncation of incomplete alloc direct dependencies.  We
@@ -6430,14 +6632,14 @@
 	if (bp->b_bufsize == fs->fs_bsize)
 		bp->b_flags |= B_CLUSTEROK;
 	softdep_update_inodeblock(ip, bp, 0);
-	if (ip->i_ump->um_fstype == UFS1)
+	if (ump->um_fstype == UFS1)
 		*((struct ufs1_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
 	else
 		*((struct ufs2_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
-	ACQUIRE_LOCK(&lk);
-	(void) inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
+	ACQUIRE_LOCK(ump);
+	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & IOSTARTED) != 0)
 		panic("softdep_setup_freeblocks: inode busy");
 	/*
@@ -6465,7 +6667,7 @@
 		}
 	}
 	if ((flags & IO_EXT) != 0)
-		while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0)
+		while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
 			cancel_allocdirect(&inodedep->id_extupdt, adp,
 			    freeblks);
 	/*
@@ -6488,7 +6690,7 @@
 	 */
 	LIST_FOREACH(jblkdep, &freeblks->fb_jblkdephd, jb_deps)
 		add_to_journal(&jblkdep->jb_list);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	bdwrite(bp);
 	/*
 	 * Truncate dependency structures beyond length.
@@ -6529,8 +6731,8 @@
 		bawrite(bp);
 
 	}
-	ACQUIRE_LOCK(&lk);
-	inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
+	ACQUIRE_LOCK(ump);
+	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	TAILQ_INSERT_TAIL(&inodedep->id_freeblklst, freeblks, fb_next);
 	freeblks->fb_state |= DEPCOMPLETE | ONDEPLIST;
 	/*
@@ -6545,7 +6747,7 @@
 		freeblks->fb_state |= INPROGRESS;
 	else
 		freeblks = NULL;
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	if (freeblks)
 		handle_workitem_freeblocks(freeblks, 0);
 	trunc_pages(ip, length, extblocks, flags);
@@ -6561,6 +6763,8 @@
 {
 	struct jfsync *jfsync;
 
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ip->i_ump)) != 0,
+	    ("softdep_journal_fsync called on non-softdep filesystem"));
 	if ((ip->i_flag & IN_TRUNCATED) == 0)
 		return;
 	ip->i_flag &= ~IN_TRUNCATED;
@@ -6568,10 +6772,10 @@
 	workitem_alloc(&jfsync->jfs_list, D_JFSYNC, UFSTOVFS(ip->i_ump));
 	jfsync->jfs_size = ip->i_size;
 	jfsync->jfs_ino = ip->i_number;
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ip->i_ump);
 	add_to_journal(&jfsync->jfs_list);
 	jwait(&jfsync->jfs_list, MNT_WAIT);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ip->i_ump);
 }
 
 /*
@@ -6614,20 +6818,23 @@
 	struct freeblks *freeblks;
 	struct inodedep *inodedep;
 	struct allocdirect *adp;
+	struct ufsmount *ump;
 	struct buf *bp;
 	struct fs *fs;
 	ufs2_daddr_t extblocks, datablocks;
 	struct mount *mp;
-	int i, delay, error, dflags;
+	int i, delay, error;
 	ufs_lbn_t tmpval;
 	ufs_lbn_t lbn;
 
+	ump = ip->i_ump;
+	mp = UFSTOVFS(ump);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_freeblocks called on non-softdep filesystem"));
 	CTR2(KTR_SUJ, "softdep_setup_freeblks: ip %d length %ld",
 	    ip->i_number, length);
+	KASSERT(length == 0, ("softdep_setup_freeblocks: non-zero length"));
 	fs = ip->i_fs;
-	mp = UFSTOVFS(ip->i_ump);
-	if (length != 0)
-		panic("softdep_setup_freeblocks: non-zero length");
 	freeblks = newfreeblks(mp, ip);
 	extblocks = 0;
 	datablocks = 0;
@@ -6655,9 +6862,9 @@
 	(void) chkdq(ip, -datablocks, NOCRED, 0);
 #endif
 	freeblks->fb_chkcnt = -datablocks;
-	UFS_LOCK(ip->i_ump);
+	UFS_LOCK(ump);
 	fs->fs_pendingblocks += datablocks;
-	UFS_UNLOCK(ip->i_ump);
+	UFS_UNLOCK(ump);
 	DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - datablocks);
 	/*
 	 * Push the zero'ed inode to to its disk buffer so that we are free
@@ -6670,7 +6877,7 @@
 		brelse(bp);
 		softdep_error("softdep_setup_freeblocks", error);
 	}
-	if (ip->i_ump->um_fstype == UFS1) {
+	if (ump->um_fstype == UFS1) {
 		dp1 = ((struct ufs1_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number));
 		ip->i_din1->di_freelink = dp1->di_freelink;
@@ -6684,11 +6891,8 @@
 	/*
 	 * Find and eliminate any inode dependencies.
 	 */
-	ACQUIRE_LOCK(&lk);
-	dflags = DEPALLOC;
-	if (IS_SNAPSHOT(ip))
-		dflags |= NODELAY;
-	(void) inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
+	ACQUIRE_LOCK(ump);
+	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & IOSTARTED) != 0)
 		panic("softdep_setup_freeblocks: inode busy");
 	/*
@@ -6715,7 +6919,7 @@
 	if (flags & IO_NORMAL) {
 		merge_inode_lists(&inodedep->id_newinoupdt,
 		    &inodedep->id_inoupdt);
-		while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
+		while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
 			cancel_allocdirect(&inodedep->id_inoupdt, adp,
 			    freeblks);
 	}
@@ -6722,14 +6926,14 @@
 	if (flags & IO_EXT) {
 		merge_inode_lists(&inodedep->id_newextupdt,
 		    &inodedep->id_extupdt);
-		while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0)
+		while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
 			cancel_allocdirect(&inodedep->id_extupdt, adp,
 			    freeblks);
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	bdwrite(bp);
 	trunc_dependencies(ip, freeblks, -1, 0, flags);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
 	freeblks->fb_state |= DEPCOMPLETE;
@@ -6741,7 +6945,7 @@
 		freeblks->fb_state |= INPROGRESS;
 	else
 		freeblks = NULL;
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	if (freeblks)
 		handle_workitem_freeblocks(freeblks, 0);
 	trunc_pages(ip, length, extblocks, flags);
@@ -6854,7 +7058,6 @@
 	struct bufobj *bo;
 	struct vnode *vp;
 	struct buf *bp;
-	struct fs *fs;
 	int blkoff;
 
 	/*
@@ -6863,7 +7066,6 @@
 	 * Once they are all there, walk the list and get rid of
 	 * any dependencies.
 	 */
-	fs = ip->i_fs;
 	vp = ITOV(ip);
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
@@ -6878,7 +7080,8 @@
 			bp->b_vflags |= BV_SCANNED;
 			continue;
 		}
-		if ((bp = getdirtybuf(bp, BO_MTX(bo), MNT_WAIT)) == NULL)
+		KASSERT(bp->b_bufobj == bo, ("Wrong object in buffer"));
+		if ((bp = getdirtybuf(bp, BO_LOCKPTR(bo), MNT_WAIT)) == NULL)
 			goto restart;
 		BO_UNLOCK(bo);
 		if (deallocate_dependencies(bp, freeblks, blkoff))
@@ -6903,14 +7106,12 @@
 		}
 		if (BUF_LOCK(bp,
 		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
-		    BO_MTX(bo)) == ENOLCK) {
+		    BO_LOCKPTR(bo)) == ENOLCK) {
 			BO_LOCK(bo);
 			goto cleanrestart;
 		}
 		bp->b_vflags |= BV_SCANNED;
-		BO_LOCK(bo);
 		bremfree(bp);
-		BO_UNLOCK(bo);
 		if (blkoff != 0) {
 			allocbuf(bp, blkoff);
 			bqrelse(bp);
@@ -7015,10 +7216,13 @@
 {
 	struct indirdep *indirdep;
 	struct pagedep *pagedep;
-	struct allocdirect *adp;
 	struct worklist *wk, *wkn;
+	struct ufsmount *ump;
 
-	ACQUIRE_LOCK(&lk);
+	if ((wk = LIST_FIRST(&bp->b_dep)) == NULL)
+		goto done;
+	ump = VFSTOUFS(wk->wk_mp);
+	ACQUIRE_LOCK(ump);
 	LIST_FOREACH_SAFE(wk, &bp->b_dep, wk_list, wkn) {
 		switch (wk->wk_type) {
 		case D_INDIRDEP:
@@ -7032,7 +7236,7 @@
 		case D_PAGEDEP:
 			pagedep = WK_PAGEDEP(wk);
 			if (cancel_pagedep(pagedep, freeblks, off)) {
-				FREE_LOCK(&lk);
+				FREE_LOCK(ump);
 				return (ERESTART);
 			}
 			continue;
@@ -7058,7 +7262,6 @@
 			break;
 
 		case D_ALLOCDIRECT:
-			adp = WK_ALLOCDIRECT(wk);
 			if (off != 0)
 				continue;
 			/* FALLTHROUGH */
@@ -7068,7 +7271,8 @@
 			/* NOTREACHED */
 		}
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
+done:
 	/*
 	 * Don't throw away this buf, we were partially truncating and
 	 * some deps may always remain.
@@ -7223,8 +7427,10 @@
 	struct worklist *wk;
 
 	KASSERT(newblk->nb_jnewblk == NULL,
-	    ("free_newblk; jnewblk %p still attached", newblk->nb_jnewblk));
-	mtx_assert(&lk, MA_OWNED);
+	    ("free_newblk: jnewblk %p still attached", newblk->nb_jnewblk));
+	KASSERT(newblk->nb_list.wk_type != D_NEWBLK,
+	    ("free_newblk: unclaimed newblk"));
+	LOCK_OWNED(VFSTOUFS(newblk->nb_list.wk_mp));
 	newblk_freefrag(newblk);
 	if (newblk->nb_state & ONDEPLIST)
 		LIST_REMOVE(newblk, nb_deps);
@@ -7238,7 +7444,6 @@
 	while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL)
 		indirdep_complete(indirdep);
 	handle_jwork(&newblk->nb_jwork);
-	newblk->nb_list.wk_type = D_NEWBLK;
 	WORKITEM_FREE(newblk, D_NEWBLK);
 }
 
@@ -7254,7 +7459,7 @@
 	struct diradd *dap;
 	struct worklist *wk;
 
-	mtx_assert(&lk, MA_OWNED);
+	LOCK_OWNED(VFSTOUFS(newdirblk->db_list.wk_mp));
 	WORKLIST_REMOVE(&newdirblk->db_list);
 	/*
 	 * If the pagedep is still linked onto the directory buffer
@@ -7298,7 +7503,11 @@
 	struct inodedep *inodedep;
 	struct freefile *freefile;
 	struct freeblks *freeblks;
+	struct ufsmount *ump;
 
+	ump = ip->i_ump;
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
+	    ("softdep_freefile called on non-softdep filesystem"));
 	/*
 	 * This sets up the inode de-allocation dependency.
 	 */
@@ -7309,9 +7518,9 @@
 	freefile->fx_oldinum = ino;
 	freefile->fx_devvp = ip->i_devvp;
 	LIST_INIT(&freefile->fx_jwork);
-	UFS_LOCK(ip->i_ump);
+	UFS_LOCK(ump);
 	ip->i_fs->fs_pendinginodes += 1;
-	UFS_UNLOCK(ip->i_ump);
+	UFS_UNLOCK(ump);
 
 	/*
 	 * If the inodedep does not exist, then the zero'ed inode has
@@ -7324,7 +7533,7 @@
 	 * Any blocks waiting on the inode to write can be safely freed
 	 * here as it will never been written.
 	 */
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
 	if (inodedep) {
 		/*
@@ -7348,12 +7557,15 @@
 			 */
 			handle_bufwait(inodedep, &freefile->fx_jwork);
 			clear_unlinked_inodedep(inodedep);
-			/* Re-acquire inodedep as we've dropped lk. */
+			/*
+			 * Re-acquire inodedep as we've dropped the
+			 * per-filesystem lock in clear_unlinked_inodedep().
+			 */
 			inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
 		}
 	}
 	if (inodedep == NULL || check_inode_unwritten(inodedep)) {
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		handle_workitem_freefile(freefile);
 		return;
 	}
@@ -7360,7 +7572,7 @@
 	if ((inodedep->id_state & DEPCOMPLETE) == 0)
 		inodedep->id_state |= GOINGAWAY;
 	WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	if (ip->i_number == ino)
 		ip->i_flag |= IN_MODIFIED;
 }
@@ -7385,7 +7597,7 @@
 	struct inodedep *inodedep;
 {
 
-	mtx_assert(&lk, MA_OWNED);
+	LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
 
 	if ((inodedep->id_state & (DEPCOMPLETE | UNLINKED)) != 0 ||
 	    !LIST_EMPTY(&inodedep->id_dirremhd) ||
@@ -7425,17 +7637,13 @@
 	return (1);
 }
 
-/*
- * Try to free an inodedep structure. Return 1 if it could be freed.
- */
 static int
-free_inodedep(inodedep)
+check_inodedep_free(inodedep)
 	struct inodedep *inodedep;
 {
 
-	mtx_assert(&lk, MA_OWNED);
-	if ((inodedep->id_state & (ONWORKLIST | UNLINKED)) != 0 ||
-	    (inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
+	LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
+	if ((inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
 	    !LIST_EMPTY(&inodedep->id_dirremhd) ||
 	    !LIST_EMPTY(&inodedep->id_pendinghd) ||
 	    !LIST_EMPTY(&inodedep->id_bufwait) ||
@@ -7450,6 +7658,21 @@
 	    inodedep->id_nlinkdelta != 0 ||
 	    inodedep->id_savedino1 != NULL)
 		return (0);
+	return (1);
+}
+
+/*
+ * Try to free an inodedep structure. Return 1 if it could be freed.
+ */
+static int
+free_inodedep(inodedep)
+	struct inodedep *inodedep;
+{
+
+	LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
+	if ((inodedep->id_state & (ONWORKLIST | UNLINKED)) != 0 ||
+	    !check_inodedep_free(inodedep))
+		return (0);
 	if (inodedep->id_state & ONDEPLIST)
 		LIST_REMOVE(inodedep, id_deps);
 	LIST_REMOVE(inodedep, id_hash);
@@ -7476,7 +7699,8 @@
 	int bsize;
 	int needj;
 
-	mtx_assert(&lk, MA_OWNED);
+	ump = VFSTOUFS(freework->fw_list.wk_mp);
+	LOCK_OWNED(ump);
 	/*
 	 * Handle partial truncate separately.
 	 */
@@ -7485,7 +7709,6 @@
 		return;
 	}
 	freeblks = freework->fw_freeblks;
-	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
 	fs = ump->um_fs;
 	needj = MOUNTEDSUJ(freeblks->fb_list.wk_mp) != 0;
 	bsize = lfragtosize(fs, freework->fw_frags);
@@ -7517,7 +7740,7 @@
 		freeblks->fb_cgwait++;
 		WORKLIST_INSERT(&wkhd, &freework->fw_list);
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	freeblks_free(ump, freeblks, btodb(bsize));
 	CTR4(KTR_SUJ,
 	    "freework_freeblock: ino %d blkno %jd lbn %jd size %ld",
@@ -7524,7 +7747,7 @@
 	    freeblks->fb_inum, freework->fw_blkno, freework->fw_lbn, bsize);
 	ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize,
 	    freeblks->fb_inum, freeblks->fb_vtype, &wkhd);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	/*
 	 * The jnewblk will be discarded and the bits in the map never
 	 * made it to disk.  We can immediately free the freeblk.
@@ -7580,10 +7803,10 @@
 		return;
 	}
 	freework->fw_state |= INPROGRESS;
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	indir_trunc(freework, fsbtodb(fs, freework->fw_blkno),
 	    freework->fw_lbn);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 }
 
 /*
@@ -7639,7 +7862,7 @@
 	KASSERT(LIST_EMPTY(&freeblks->fb_jblkdephd),
 	    ("handle_workitem_freeblocks: Journal entries not written."));
 	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	while ((wk = LIST_FIRST(&freeblks->fb_freeworkhd)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		switch (wk->wk_type) {
@@ -7656,11 +7879,11 @@
 			aip = WK_ALLOCINDIR(wk);
 			freework = NULL;
 			if (aip->ai_state & DELAYEDFREE) {
-				FREE_LOCK(&lk);
+				FREE_LOCK(ump);
 				freework = newfreework(ump, freeblks, NULL,
 				    aip->ai_lbn, aip->ai_newblkno,
 				    ump->um_fs->fs_frag, 0, 0);
-				ACQUIRE_LOCK(&lk);
+				ACQUIRE_LOCK(ump);
 			}
 			newblk = WK_NEWBLK(wk);
 			if (newblk->nb_jnewblk) {
@@ -7688,7 +7911,7 @@
 		wake_worklist(&freeblks->fb_list);
 		freeblks = NULL;
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	if (freeblks)
 		return handle_complete_freeblocks(freeblks, flags);
 	return (0);
@@ -7775,7 +7998,7 @@
 		quotaadj(freeblks->fb_quota, ump, -spare);
 	quotarele(freeblks->fb_quota);
 #endif
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	if (freeblks->fb_state & ONDEPLIST) {
 		inodedep_lookup(freeblks->fb_list.wk_mp, freeblks->fb_inum,
 		    0, &inodedep);
@@ -7790,7 +8013,7 @@
 	 */
 	handle_jwork(&freeblks->fb_jwork);
 	WORKITEM_FREE(freeblks, D_FREEBLKS);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	return (0);
 }
 
@@ -7818,8 +8041,8 @@
 	struct fs *fs;
 	struct indirdep *indirdep;
 	struct ufsmount *ump;
-	ufs1_daddr_t *bap1 = 0;
-	ufs2_daddr_t nb, nnb, *bap2 = 0;
+	ufs1_daddr_t *bap1;
+	ufs2_daddr_t nb, nnb, *bap2;
 	ufs_lbn_t lbnadd, nlbn;
 	int i, nblocks, ufs1fmt;
 	int freedblocks;
@@ -7869,7 +8092,7 @@
 		brelse(bp);
 		return;
 	}
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	/* Protects against a race with complete_trunc_indir(). */
 	freework->fw_state &= ~INPROGRESS;
 	/*
@@ -7886,7 +8109,7 @@
 			if (freework->fw_indir == NULL)
 				TAILQ_INSERT_TAIL(&indirdep->ir_trunc,
 				    freework, fw_next);
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			return;
 		}
 		/*
@@ -7893,21 +8116,21 @@
 		 * If we're goingaway, free the indirdep.  Otherwise it will
 		 * linger until the write completes.
 		 */
-		if (goingaway) {
+		if (goingaway)
 			free_indirdep(indirdep);
-			ump->um_numindirdeps -= 1;
-		}
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	/* Initialize pointers depending on block size. */
 	if (ump->um_fstype == UFS1) {
 		bap1 = (ufs1_daddr_t *)bp->b_data;
 		nb = bap1[freework->fw_off];
 		ufs1fmt = 1;
+		bap2 = NULL;
 	} else {
 		bap2 = (ufs2_daddr_t *)bp->b_data;
 		nb = bap2[freework->fw_off];
 		ufs1fmt = 0;
+		bap1 = NULL;
 	}
 	level = lbn_level(lbn);
 	needj = MOUNTEDSUJ(UFSTOVFS(ump)) != 0;
@@ -7978,7 +8201,7 @@
 	 * indirect can be completed when its children are free.
 	 */
 	if (needj) {
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 		freework->fw_off = i;
 		freework->fw_ref += freedeps;
 		freework->fw_ref -= NINDIR(fs) + 1;
@@ -7986,7 +8209,7 @@
 			freeblks->fb_cgwait += freedeps;
 		if (freework->fw_ref == 0)
 			freework_freeblock(freework);
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		return;
 	}
 	/*
@@ -8001,9 +8224,9 @@
 	/* Non SUJ softdep does single-threaded truncations. */
 	if (freework->fw_blkno == dbn) {
 		freework->fw_state |= ALLCOMPLETE;
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 		handle_written_freework(freework);
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 	}
 	return;
 }
@@ -8068,7 +8291,8 @@
  * Create the mkdir dependencies for . and .. in a new directory.  Link them
  * in to a newdirblk so any subsequent additions are tracked properly.  The
  * caller is responsible for adding the mkdir1 dependency to the journal
- * and updating id_mkdiradd.  This function returns with lk held.
+ * and updating id_mkdiradd.  This function returns with the per-filesystem
+ * lock held.
  */
 static struct mkdir *
 setup_newdir(dap, newinum, dinum, newdirbp, mkdirp)
@@ -8081,13 +8305,15 @@
 	struct newblk *newblk;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
-	struct newdirblk *newdirblk = 0;
+	struct newdirblk *newdirblk;
 	struct mkdir *mkdir1, *mkdir2;
 	struct worklist *wk;
 	struct jaddref *jaddref;
+	struct ufsmount *ump;
 	struct mount *mp;
 
 	mp = dap->da_list.wk_mp;
+	ump = VFSTOUFS(mp);
 	newdirblk = malloc(sizeof(struct newdirblk), M_NEWDIRBLK,
 	    M_SOFTDEP_FLAGS);
 	workitem_alloc(&newdirblk->db_list, D_NEWDIRBLK, mp);
@@ -8110,8 +8336,8 @@
 	 * Dependency on "." and ".." being written to disk.
 	 */
 	mkdir1->md_buf = newdirbp;
-	ACQUIRE_LOCK(&lk);
-	LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs);
+	ACQUIRE_LOCK(VFSTOUFS(mp));
+	LIST_INSERT_HEAD(&ump->softdep_mkdirlisthd, mkdir1, md_mkdirs);
 	/*
 	 * We must link the pagedep, allocdirect, and newdirblk for
 	 * the initial file page so the pointer to the new directory
@@ -8150,7 +8376,7 @@
 		KASSERT(jaddref != NULL && jaddref->ja_parent == newinum &&
 		    (jaddref->ja_state & MKDIR_PARENT),
 		    ("setup_newdir: bad dotdot jaddref %p", jaddref));
-		LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);
+		LIST_INSERT_HEAD(&ump->softdep_mkdirlisthd, mkdir2, md_mkdirs);
 		mkdir2->md_jaddref = jaddref;
 		jaddref->ja_mkdir = mkdir2;
 	} else if (inodedep == NULL ||
@@ -8157,8 +8383,9 @@
 	    (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		dap->da_state &= ~MKDIR_PARENT;
 		WORKITEM_FREE(mkdir2, D_MKDIR);
+		mkdir2 = NULL;
 	} else {
-		LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);
+		LIST_INSERT_HEAD(&ump->softdep_mkdirlisthd, mkdir2, md_mkdirs);
 		WORKLIST_INSERT(&inodedep->id_bufwait, &mkdir2->md_list);
 	}
 	*mkdirp = mkdir2;
@@ -8205,12 +8432,17 @@
 	struct newblk *newblk;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
-	struct newdirblk *newdirblk = 0;
+	struct newdirblk *newdirblk;
 	struct mkdir *mkdir1, *mkdir2;
 	struct jaddref *jaddref;
+	struct ufsmount *ump;
 	struct mount *mp;
 	int isindir;
 
+	ump = dp->i_ump;
+	mp = UFSTOVFS(ump);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_directory_add called on non-softdep filesystem"));
 	/*
 	 * Whiteouts have no dependencies.
 	 */
@@ -8221,7 +8453,6 @@
 	}
 	jaddref = NULL;
 	mkdir1 = mkdir2 = NULL;
-	mp = UFSTOVFS(dp->i_ump);
 	fs = dp->i_fs;
 	lbn = lblkno(fs, diroffset);
 	offset = blkoff(fs, diroffset);
@@ -8233,6 +8464,7 @@
 	dap->da_state = ATTACHED;
 	LIST_INIT(&dap->da_jwork);
 	isindir = bp->b_lblkno >= NDADDR;
+	newdirblk = NULL;
 	if (isnewblk &&
 	    (isindir ? blkoff(fs, diroffset) : fragoff(fs, diroffset)) == 0) {
 		newdirblk = malloc(sizeof(struct newdirblk),
@@ -8247,7 +8479,7 @@
 	 */
 	if (newdirbp == NULL) {
 		dap->da_state |= DEPCOMPLETE;
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 	} else {
 		dap->da_state |= MKDIR_BODY | MKDIR_PARENT;
 		mkdir1 = setup_newdir(dap, newinum, dp->i_number, newdirbp,
@@ -8265,7 +8497,7 @@
 	dap->da_pagedep = pagedep;
 	LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,
 	    da_pdlist);
-	inodedep_lookup(mp, newinum, DEPALLOC | NODELAY, &inodedep);
+	inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
 	/*
 	 * If we're journaling, link the diradd into the jaddref so it
 	 * may be completed after the journal entry is written.  Otherwise,
@@ -8323,7 +8555,7 @@
 		inodedep->id_mkdiradd = dap;
 	} else if (inodedep->id_mkdiradd)
 		merge_diradd(inodedep, dap);
-	if (newdirblk) {
+	if (newdirblk != NULL) {
 		/*
 		 * There is nothing to do if we are already tracking
 		 * this block.
@@ -8330,7 +8562,7 @@
 		 */
 		if ((pagedep->pd_state & NEWBLOCK) != 0) {
 			WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			return (0);
 		}
 		if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, &newblk)
@@ -8340,7 +8572,7 @@
 		pagedep->pd_state |= NEWBLOCK;
 		pagedep->pd_newdirblk = newdirblk;
 		newdirblk->db_pagedep = pagedep;
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		/*
 		 * If we extended into an indirect signal direnter to sync.
 		 */
@@ -8348,7 +8580,7 @@
 			return (1);
 		return (0);
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	return (0);
 }
 
@@ -8378,6 +8610,9 @@
 	int flags;
 
 	mp = UFSTOVFS(dp->i_ump);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_change_directoryentry_offset called on "
+	     "non-softdep filesystem"));
 	de = (struct direct *)oldloc;
 	jmvref = NULL;
 	flags = 0;
@@ -8396,7 +8631,7 @@
 	offset = blkoff(dp->i_fs, dp->i_offset);
 	oldoffset = offset + (oldloc - base);
 	newoffset = offset + (newloc - base);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(dp->i_ump);
 	if (pagedep_lookup(mp, bp, dp->i_number, lbn, flags, &pagedep) == 0)
 		goto done;
 	dap = diradd_lookup(pagedep, oldoffset);
@@ -8418,7 +8653,7 @@
 		add_to_journal(&jmvref->jm_list);
 	}
 	bcopy(oldloc, newloc, entrysize);
-	FREE_LOCK(&lk);
+	FREE_LOCK(dp->i_ump);
 }
 
 /*
@@ -8434,6 +8669,7 @@
 {
 	struct diradd *olddap;
 	struct mkdir *mkdir, *nextmd;
+	struct ufsmount *ump;
 	short state;
 
 	olddap = inodedep->id_mkdiradd;
@@ -8440,7 +8676,9 @@
 	inodedep->id_mkdiradd = newdap;
 	if ((olddap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
 		newdap->da_state &= ~DEPCOMPLETE;
-		for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) {
+		ump = VFSTOUFS(inodedep->id_list.wk_mp);
+		for (mkdir = LIST_FIRST(&ump->softdep_mkdirlisthd); mkdir;
+		     mkdir = nextmd) {
 			nextmd = LIST_NEXT(mkdir, md_mkdirs);
 			if (mkdir->md_diradd != olddap)
 				continue;
@@ -8499,6 +8737,7 @@
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct inoref *inoref;
+	struct ufsmount *ump;
 	struct mkdir *mkdir;
 
 	/*
@@ -8535,7 +8774,8 @@
 	 * journaling.
 	 */
 	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
-		LIST_FOREACH(mkdir, &mkdirlisthd, md_mkdirs) {
+		ump = VFSTOUFS(dap->da_list.wk_mp);
+		LIST_FOREACH(mkdir, &ump->softdep_mkdirlisthd, md_mkdirs) {
 			if (mkdir->md_diradd != dap)
 				continue;
 			if ((jaddref = mkdir->md_jaddref) == NULL)
@@ -8580,8 +8820,10 @@
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct mkdir *mkdir, *nextmd;
+	struct ufsmount *ump;
 
-	mtx_assert(&lk, MA_OWNED);
+	ump = VFSTOUFS(dap->da_list.wk_mp);
+	LOCK_OWNED(ump);
 	LIST_REMOVE(dap, da_pdlist);
 	if (dap->da_state & ONWORKLIST)
 		WORKLIST_REMOVE(&dap->da_list);
@@ -8600,7 +8842,8 @@
 		if (inodedep->id_mkdiradd == dap)
 			inodedep->id_mkdiradd = NULL;
 	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
-		for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) {
+		for (mkdir = LIST_FIRST(&ump->softdep_mkdirlisthd); mkdir;
+		     mkdir = nextmd) {
 			nextmd = LIST_NEXT(mkdir, md_mkdirs);
 			if (mkdir->md_diradd != dap)
 				continue;
@@ -8655,6 +8898,8 @@
 	struct inodedep *inodedep;
 	int direct;
 
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ip->i_ump)) != 0,
+	    ("softdep_setup_remove called on non-softdep filesystem"));
 	/*
 	 * Allocate a new dirrem if appropriate and ACQUIRE_LOCK.  We want
 	 * newdirrem() to setup the full directory remove which requires
@@ -8686,7 +8931,7 @@
 	if ((dirrem->dm_state & COMPLETE) == 0) {
 		LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
 		    dm_next);
-		FREE_LOCK(&lk);
+		FREE_LOCK(ip->i_ump);
 	} else {
 		if (prevdirrem != NULL)
 			LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd,
@@ -8693,7 +8938,7 @@
 			    prevdirrem, dm_next);
 		dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
 		direct = LIST_EMPTY(&dirrem->dm_jremrefhd);
-		FREE_LOCK(&lk);
+		FREE_LOCK(ip->i_ump);
 		if (direct)
 			handle_workitem_remove(dirrem, 0);
 	}
@@ -8765,6 +9010,7 @@
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
+	struct ufsmount *ump;
 	struct mkdir *mkdir;
 	struct diradd *dap;
 
@@ -8774,7 +9020,8 @@
 	dap = inodedep->id_mkdiradd;
 	if (dap == NULL || (dap->da_state & MKDIR_PARENT) == 0)
 		return (jremref);
-	for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir;
+	ump = VFSTOUFS(inodedep->id_list.wk_mp);
+	for (mkdir = LIST_FIRST(&ump->softdep_mkdirlisthd); mkdir;
 	    mkdir = LIST_NEXT(mkdir, md_mkdirs))
 		if (mkdir->md_diradd == dap && mkdir->md_state & MKDIR_PARENT)
 			break;
@@ -8863,16 +9110,19 @@
 		panic("newdirrem: whiteout");
 	dvp = ITOV(dp);
 	/*
-	 * If we are over our limit, try to improve the situation.
+	 * If the system is over its limit and our filesystem is
+	 * responsible for more than our share of that usage and
+	 * we are not a snapshot, request some inodedep cleanup.
 	 * Limiting the number of dirrem structures will also limit
 	 * the number of freefile and freeblks structures.
 	 */
-	ACQUIRE_LOCK(&lk);
-	if (!IS_SNAPSHOT(ip) && dep_current[D_DIRREM] > max_softdeps / 2)
-		(void) request_cleanup(ITOV(dp)->v_mount, FLUSH_BLOCKS);
-	FREE_LOCK(&lk);
-	dirrem = malloc(sizeof(struct dirrem),
-		M_DIRREM, M_SOFTDEP_FLAGS|M_ZERO);
+	ACQUIRE_LOCK(ip->i_ump);
+	if (!IS_SNAPSHOT(ip) && softdep_excess_items(ip->i_ump, D_DIRREM))
+		schedule_cleanup(ITOV(dp)->v_mount);
+	else
+		FREE_LOCK(ip->i_ump);
+	dirrem = malloc(sizeof(struct dirrem), M_DIRREM, M_SOFTDEP_FLAGS |
+	    M_ZERO);
 	workitem_alloc(&dirrem->dm_list, D_DIRREM, dvp->v_mount);
 	LIST_INIT(&dirrem->dm_jremrefhd);
 	LIST_INIT(&dirrem->dm_jwork);
@@ -8900,7 +9150,7 @@
 			jremref = newjremref(dirrem, dp, ip, dp->i_offset,
 			    ip->i_effnlink + 1);
 	}
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ip->i_ump);
 	lbn = lblkno(dp->i_fs, dp->i_offset);
 	offset = blkoff(dp->i_fs, dp->i_offset);
 	pagedep_lookup(UFSTOVFS(dp->i_ump), bp, dp->i_number, lbn, DEPALLOC,
@@ -8947,8 +9197,8 @@
 	if ((dap->da_state & ATTACHED) == 0)
 		panic("newdirrem: not ATTACHED");
 	if (dap->da_newinum != ip->i_number)
-		panic("newdirrem: inum %d should be %d",
-		    ip->i_number, dap->da_newinum);
+		panic("newdirrem: inum %ju should be %ju",
+		    (uintmax_t)ip->i_number, (uintmax_t)dap->da_newinum);
 	/*
 	 * If we are deleting a changed name that never made it to disk,
 	 * then return the dirrem describing the previous inode (which
@@ -9013,6 +9263,8 @@
 
 	offset = blkoff(dp->i_fs, dp->i_offset);
 	mp = UFSTOVFS(dp->i_ump);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	   ("softdep_setup_directory_change called on non-softdep filesystem"));
 
 	/*
 	 * Whiteouts do not need diradd dependencies.
@@ -9060,7 +9312,7 @@
 			if (LIST_EMPTY(&dirrem->dm_jremrefhd))
 				add_to_worklist(&dirrem->dm_list, 0);
 		}
-		FREE_LOCK(&lk);
+		FREE_LOCK(dp->i_ump);
 		return;
 	}
 	/*
@@ -9106,7 +9358,7 @@
 	 * inode is not yet written. If it is written, do the post-inode
 	 * write processing to put it on the id_pendinghd list.
 	 */
-	inodedep_lookup(mp, newinum, DEPALLOC | NODELAY, &inodedep);
+	inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
 	if (MOUNTEDSUJ(mp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
@@ -9134,7 +9386,7 @@
 	 */
 	if (inodedep->id_mkdiradd && dp->i_offset != DOTDOT_OFFSET)
 		merge_diradd(inodedep, dap);
-	FREE_LOCK(&lk);
+	FREE_LOCK(dp->i_ump);
 }
 
 /*
@@ -9148,17 +9400,16 @@
 	struct inode *ip;	/* the inode with the increased link count */
 {
 	struct inodedep *inodedep;
-	int dflags;
 
-	ACQUIRE_LOCK(&lk);
-	dflags = DEPALLOC;
-	if (IS_SNAPSHOT(ip))
-		dflags |= NODELAY;
-	inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, dflags, &inodedep);
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ip->i_ump)) != 0,
+	    ("softdep_change_linkcnt called on non-softdep filesystem"));
+	ACQUIRE_LOCK(ip->i_ump);
+	inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC,
+	    &inodedep);
 	if (ip->i_nlink < ip->i_effnlink)
 		panic("softdep_change_linkcnt: bad delta");
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
-	FREE_LOCK(&lk);
+	FREE_LOCK(ip->i_ump);
 }
 
 /*
@@ -9174,8 +9425,8 @@
 	struct sbdep *sbdep;
 	struct worklist *wk;
 
-	if (MOUNTEDSUJ(UFSTOVFS(ump)) == 0)
-		return;
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
+	    ("softdep_setup_sbupdate called on non-softdep filesystem"));
 	LIST_FOREACH(wk, &bp->b_dep, wk_list)
 		if (wk->wk_type == D_SBDEP)
 			break;
@@ -9185,9 +9436,9 @@
 	workitem_alloc(&sbdep->sb_list, D_SBDEP, UFSTOVFS(ump));
 	sbdep->sb_fs = fs;
 	sbdep->sb_ump = ump;
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	WORKLIST_INSERT(&bp->b_dep, &sbdep->sb_list);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 }
 
 /*
@@ -9201,7 +9452,7 @@
 	struct inodedep *inodedep;
 	struct inodedep *idp;
 
-	mtx_assert(&lk, MA_OWNED);
+	LOCK_OWNED(ump);
 	for (inodedep = TAILQ_LAST(&ump->softdep_unlinked, inodedeplst);
 	    inodedep; inodedep = idp) {
 		if ((inodedep->id_state & UNLINKNEXT) == 0)
@@ -9247,12 +9498,10 @@
 	struct buf *bp;
 {
 	struct inodedep *inodedep;
-	struct mount *mp;
 	struct fs *fs;
 
-	mtx_assert(&lk, MA_OWNED);
+	LOCK_OWNED(sbdep->sb_ump);
 	fs = sbdep->sb_fs;
-	mp = UFSTOVFS(sbdep->sb_ump);
 	/*
 	 * If the superblock doesn't match the in-memory list start over.
 	 */
@@ -9292,10 +9541,10 @@
 {
 	struct ufsmount *ump;
 
-	mtx_assert(&lk, MA_OWNED);
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
 	if (MOUNTEDSUJ(mp) == 0)
 		return;
-	ump = VFSTOUFS(mp);
 	ump->um_fs->fs_fmod = 1;
 	if (inodedep->id_state & UNLINKED)
 		panic("unlinked_inodedep: %p already unlinked\n", inodedep);
@@ -9326,7 +9575,7 @@
 	ino = inodedep->id_ino;
 	error = 0;
 	for (;;) {
-		mtx_assert(&lk, MA_OWNED);
+		LOCK_OWNED(ump);
 		KASSERT((inodedep->id_state & UNLINKED) != 0,
 		    ("clear_unlinked_inodedep: inodedep %p not unlinked",
 		    inodedep));
@@ -9364,15 +9613,18 @@
 		pino = 0;
 		if (idp && (idp->id_state & UNLINKNEXT))
 			pino = idp->id_ino;
-		FREE_LOCK(&lk);
-		if (pino == 0)
+		FREE_LOCK(ump);
+		if (pino == 0) {
 			bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
 			    (int)fs->fs_sbsize, 0, 0, 0);
-		else
+		} else {
 			error = bread(ump->um_devvp,
 			    fsbtodb(fs, ino_to_fsba(fs, pino)),
 			    (int)fs->fs_bsize, NOCRED, &bp);
-		ACQUIRE_LOCK(&lk);
+			if (error)
+				brelse(bp);
+		}
+		ACQUIRE_LOCK(ump);
 		if (error)
 			break;
 		/* If the list has changed restart the loop. */
@@ -9382,9 +9634,9 @@
 			nino = idp->id_ino;
 		if (nino != pino ||
 		    (inodedep->id_state & UNLINKPREV) != UNLINKPREV) {
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			brelse(bp);
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 			continue;
 		}
 		nino = 0;
@@ -9400,7 +9652,7 @@
 		    inodedep));
 		inodedep->id_state &= ~(UNLINKED | UNLINKLINKS | UNLINKONLIST);
 		TAILQ_REMOVE(&ump->softdep_unlinked, inodedep, id_unlinked);
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		/*
 		 * The predecessor's next pointer is manually updated here
 		 * so that the NEXT flag is never cleared for an element
@@ -9422,13 +9674,13 @@
 		 * filesystem is corrupted already.
 		 */
 		bwrite(bp);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 		/*
 		 * If the superblock pointer still needs to be cleared force
 		 * a write here.
 		 */
 		if (fs->fs_sujfree == ino) {
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
 			    (int)fs->fs_sbsize, 0, 0, 0);
 			bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
@@ -9436,7 +9688,7 @@
 			softdep_setup_sbupdate(ump, (struct fs *)bp->b_data,
 			    bp);
 			bwrite(bp);
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 		}
 
 		if (fs->fs_sujfree != ino)
@@ -9478,7 +9730,7 @@
 	if (ffs_vgetf(mp, oldinum, flags, &vp, FFSV_FORCEINSMQ) != 0)
 		return (EBUSY);
 	ip = VTOI(vp);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	if ((inodedep_lookup(mp, oldinum, 0, &inodedep)) == 0)
 		panic("handle_workitem_remove: lost inodedep");
 	if (dirrem->dm_state & ONDEPLIST)
@@ -9520,7 +9772,7 @@
 		    ("handle_workitem_remove: worklist not empty. %s",
 		    TYPENAME(LIST_FIRST(&dirrem->dm_jwork)->wk_type)));
 		WORKITEM_FREE(dirrem, D_DIRREM);
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		goto out;
 	}
 	/*
@@ -9546,7 +9798,7 @@
 		KASSERT(LIST_EMPTY(&dirrem->dm_jwork),
 		    ("handle_workitem_remove: DIRCHG and worklist not empty."));
 		WORKITEM_FREE(dirrem, D_DIRREM);
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		goto out;
 	}
 	dirrem->dm_state = ONDEPLIST;
@@ -9567,12 +9819,12 @@
 	if (inodedep == NULL ||
 	    (inodedep->id_state & (DEPCOMPLETE | UNLINKED)) == UNLINKED ||
 	    check_inode_unwritten(inodedep)) {
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		vput(vp);
 		return handle_workitem_remove(dirrem, flags);
 	}
 	WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	ip->i_flag |= IN_CHANGE;
 out:
 	ffs_update(vp, 0);
@@ -9607,9 +9859,9 @@
 	ump = VFSTOUFS(freefile->fx_list.wk_mp);
 	fs = ump->um_fs;
 #ifdef DEBUG
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	error = inodedep_lookup(UFSTOVFS(ump), freefile->fx_oldinum, 0, &idp);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	if (error)
 		panic("handle_workitem_freefile: inodedep %p survived", idp);
 #endif
@@ -9621,9 +9873,9 @@
 	if ((error = ffs_freefile(ump, fs, freefile->fx_devvp,
 	    freefile->fx_oldinum, freefile->fx_mode, &wkhd)) != 0)
 		softdep_error("handle_workitem_freefile", error);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	WORKITEM_FREE(freefile, D_FREEFILE);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 }
 
 
@@ -9673,6 +9925,7 @@
 	struct freeblks *freeblks;
 	struct jblkdep *jblkdep;
 	struct newblk *newblk;
+	struct ufsmount *ump;
 
 	/*
 	 * We only care about write operations. There should never
@@ -9685,10 +9938,13 @@
 		panic("softdep_disk_io_initiation: Writing buffer with "
 		    "background write in progress: %p", bp);
 
+	if ((wk = LIST_FIRST(&bp->b_dep)) == NULL)
+		return;
+	ump = VFSTOUFS(wk->wk_mp);
+
 	marker.wk_type = D_LAST + 1;	/* Not a normal workitem */
 	PHOLD(curproc);			/* Don't swap out kernel stack */
-
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	/*
 	 * Do any necessary pre-I/O processing.
 	 */
@@ -9772,7 +10028,7 @@
 			/* NOTREACHED */
 		}
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	PRELE(curproc);			/* Allow swapout of kernel stack */
 }
 
@@ -9808,8 +10064,8 @@
 	 * Wait for all journal remove dependencies to hit the disk.
 	 * We can not allow any potentially conflicting directory adds
 	 * to be visible before removes and rollback is too difficult.
-	 * lk may be dropped and re-acquired, however we hold the buf
-	 * locked so the dependency can not go away.
+	 * The per-filesystem lock may be dropped and re-acquired, however 
+	 * we hold the buf locked so the dependency can not go away.
 	 */
 	LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next)
 		while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL)
@@ -9821,9 +10077,10 @@
 			ep = (struct direct *)
 			    ((char *)bp->b_data + dap->da_offset);
 			if (ep->d_ino != dap->da_newinum)
-				panic("%s: dir inum %d != new %d",
+				panic("%s: dir inum %ju != new %ju",
 				    "initiate_write_filepage",
-				    ep->d_ino, dap->da_newinum);
+				    (uintmax_t)ep->d_ino,
+				    (uintmax_t)dap->da_newinum);
 			if (dap->da_state & DIRCHG)
 				ep->d_ino = dap->da_previous->dm_oldinum;
 			else
@@ -9853,6 +10110,7 @@
 	struct ufs1_dinode *dp;
 	struct ufs1_dinode *sip;
 	struct inoref *inoref;
+	struct ufsmount *ump;
 	struct fs *fs;
 	ufs_lbn_t i;
 #ifdef INVARIANTS
@@ -9864,6 +10122,8 @@
 		panic("initiate_write_inodeblock_ufs1: already started");
 	inodedep->id_state |= IOSTARTED;
 	fs = inodedep->id_fs;
+	ump = VFSTOUFS(inodedep->id_list.wk_mp);
+	LOCK_OWNED(ump);
 	dp = (struct ufs1_dinode *)bp->b_data +
 	    ino_to_fsbo(fs, inodedep->id_ino);
 
@@ -9884,10 +10144,10 @@
 	if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 		if (inodedep->id_savedino1 != NULL)
 			panic("initiate_write_inodeblock_ufs1: I/O underway");
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		sip = malloc(sizeof(struct ufs1_dinode),
 		    M_SAVEDINO, M_SOFTDEP_FLAGS);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 		inodedep->id_savedino1 = sip;
 		*inodedep->id_savedino1 = *dp;
 		bzero((caddr_t)dp, sizeof(struct ufs1_dinode));
@@ -10020,6 +10280,7 @@
 	struct ufs2_dinode *dp;
 	struct ufs2_dinode *sip;
 	struct inoref *inoref;
+	struct ufsmount *ump;
 	struct fs *fs;
 	ufs_lbn_t i;
 #ifdef INVARIANTS
@@ -10031,6 +10292,8 @@
 		panic("initiate_write_inodeblock_ufs2: already started");
 	inodedep->id_state |= IOSTARTED;
 	fs = inodedep->id_fs;
+	ump = VFSTOUFS(inodedep->id_list.wk_mp);
+	LOCK_OWNED(ump);
 	dp = (struct ufs2_dinode *)bp->b_data +
 	    ino_to_fsbo(fs, inodedep->id_ino);
 
@@ -10051,10 +10314,10 @@
 	if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 		if (inodedep->id_savedino2 != NULL)
 			panic("initiate_write_inodeblock_ufs2: I/O underway");
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		sip = malloc(sizeof(struct ufs2_dinode),
 		    M_SAVEDINO, M_SOFTDEP_FLAGS);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 		inodedep->id_savedino2 = sip;
 		*inodedep->id_savedino2 = *dp;
 		bzero((caddr_t)dp, sizeof(struct ufs2_dinode));
@@ -10265,18 +10528,17 @@
 		LIST_REMOVE(indirdep, ir_next);
 	}
 	indirdep->ir_state |= GOINGAWAY;
-	VFSTOUFS(indirdep->ir_list.wk_mp)->um_numindirdeps += 1;
 	/*
 	 * Pass in bp for blocks still have journal writes
 	 * pending so we can cancel them on their own.
 	 */
-	while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0)
+	while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != NULL)
 		cancel_allocindir(aip, bp, freeblks, 0);
-	while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0)
+	while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != NULL)
 		cancel_allocindir(aip, NULL, freeblks, 0);
-	while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != 0)
+	while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != NULL)
 		cancel_allocindir(aip, NULL, freeblks, 0);
-	while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != 0)
+	while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL)
 		cancel_allocindir(aip, NULL, freeblks, 0);
 	/*
 	 * If there are pending partial truncations we need to keep the
@@ -10331,6 +10593,7 @@
 	struct indirdep *indirdep;
 	struct buf *bp;
 {
+	struct ufsmount *ump;
 
 	indirdep->ir_state |= IOSTARTED;
 	if (indirdep->ir_state & GOINGAWAY)
@@ -10346,10 +10609,12 @@
 	 * Replace up-to-date version with safe version.
 	 */
 	if (indirdep->ir_saveddata == NULL) {
-		FREE_LOCK(&lk);
+		ump = VFSTOUFS(indirdep->ir_list.wk_mp);
+		LOCK_OWNED(ump);
+		FREE_LOCK(ump);
 		indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP,
 		    M_SOFTDEP_FLAGS);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 	}
 	indirdep->ir_state &= ~ATTACHED;
 	indirdep->ir_state |= UNDONE;
@@ -10371,19 +10636,24 @@
 {
 	struct worklist *wk, *wkn;
 	struct inodedep *inodedep;
+	struct ufsmount *ump;
 	uint8_t *inosused;
 	struct cg *cgp;
 	struct fs *fs;
 
-	ACQUIRE_LOCK(&lk);
-	fs = VFSTOUFS(mp)->um_fs;
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_inofree called on non-softdep filesystem"));
+	ump = VFSTOUFS(mp);
+	ACQUIRE_LOCK(ump);
+	fs = ump->um_fs;
 	cgp = (struct cg *)bp->b_data;
 	inosused = cg_inosused(cgp);
 	if (isset(inosused, ino % fs->fs_ipg))
-		panic("softdep_setup_inofree: inode %d not freed.", ino);
+		panic("softdep_setup_inofree: inode %ju not freed.",
+		    (uintmax_t)ino);
 	if (inodedep_lookup(mp, ino, 0, &inodedep))
-		panic("softdep_setup_inofree: ino %d has existing inodedep %p",
-		    ino, inodedep);
+		panic("softdep_setup_inofree: ino %ju has existing inodedep %p",
+		    (uintmax_t)ino, inodedep);
 	if (wkhd) {
 		LIST_FOREACH_SAFE(wk, wkhd, wk_list, wkn) {
 			if (wk->wk_type != D_JADDREF)
@@ -10393,13 +10663,13 @@
 			 * We can free immediately even if the jaddref
 			 * isn't attached in a background write as now
 			 * the bitmaps are reconciled.
-		 	 */
+			 */
 			wk->wk_state |= COMPLETE | ATTACHED;
 			free_jaddref(WK_JADDREF(wk));
 		}
 		jwork_move(&bp->b_dep, wkhd);
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 }
 
 
@@ -10419,6 +10689,7 @@
 {
 	struct bmsafemap *bmsafemap;
 	struct jnewblk *jnewblk;
+	struct ufsmount *ump;
 	struct worklist *wk;
 	struct fs *fs;
 #ifdef SUJ_DEBUG
@@ -10435,9 +10706,12 @@
 	    "softdep_setup_blkfree: blkno %jd frags %d wk head %p",
 	    blkno, frags, wkhd);
 
-	ACQUIRE_LOCK(&lk);
+	ump = VFSTOUFS(mp);
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
+	    ("softdep_setup_blkfree called on non-softdep filesystem"));
+	ACQUIRE_LOCK(ump);
 	/* Lookup the bmsafemap so we track when it is dirty. */
-	fs = VFSTOUFS(mp)->um_fs;
+	fs = ump->um_fs;
 	bmsafemap = bmsafemap_lookup(mp, bp, dtog(fs, blkno), NULL);
 	/*
 	 * Detach any jnewblks which have been canceled.  They must linger
@@ -10511,7 +10785,7 @@
 		}
 	}
 #endif
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 }
 
 /*
@@ -10518,7 +10792,7 @@
  * Revert a block allocation when the journal record that describes it
  * is not yet written.
  */
-int
+static int
 jnewblk_rollback(jnewblk, fs, cgp, blksfree)
 	struct jnewblk *jnewblk;
 	struct fs *fs;
@@ -10563,7 +10837,7 @@
 		/* Add back in counts associated with the new frags */
 		blk = blkmap(fs, blksfree, bbase);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
-                /* If a complete block has been reassembled, account for it. */
+		/* If a complete block has been reassembled, account for it. */
 		fragno = fragstoblks(fs, bbase);
 		if (ffs_isblock(fs, blksfree, fragno)) {
 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
@@ -10578,7 +10852,7 @@
 	return (frags);
 }
 
-static void 
+static void
 initiate_write_bmsafemap(bmsafemap, bp)
 	struct bmsafemap *bmsafemap;
 	struct buf *bp;			/* The cg block. */
@@ -10591,6 +10865,10 @@
 	struct fs *fs;
 	ino_t ino;
 
+	/*
+	 * If this is a background write, we did this at the time that
+	 * the copy was made, so do not need to do it again.
+	 */
 	if (bmsafemap->sm_state & IOSTARTED)
 		return;
 	bmsafemap->sm_state |= IOSTARTED;
@@ -10612,8 +10890,8 @@
 				jaddref->ja_state |= UNDONE;
 				stat_jaddref++;
 			} else
-				panic("initiate_write_bmsafemap: inode %d "
-				    "marked free", jaddref->ja_ino);
+				panic("initiate_write_bmsafemap: inode %ju "
+				    "marked free", (uintmax_t)jaddref->ja_ino);
 		}
 	}
 	/*
@@ -10657,6 +10935,7 @@
 {
 	struct worklist *wk;
 	struct worklist *owk;
+	struct ufsmount *ump;
 	struct workhead reattach;
 	struct freeblks *freeblks;
 	struct buf *sbp;
@@ -10663,10 +10942,42 @@
 
 	/*
 	 * If an error occurred while doing the write, then the data
-	 * has not hit the disk and the dependencies cannot be unrolled.
+	 * has not hit the disk and the dependencies cannot be processed.
+	 * But we do have to go through and roll forward any dependencies
+	 * that were rolled back before the disk write.
 	 */
-	if ((bp->b_ioflags & BIO_ERROR) != 0 && (bp->b_flags & B_INVAL) == 0)
+	if ((bp->b_ioflags & BIO_ERROR) != 0 && (bp->b_flags & B_INVAL) == 0) {
+		LIST_FOREACH(wk, &bp->b_dep, wk_list) {
+			switch (wk->wk_type) {
+
+			case D_PAGEDEP:
+				handle_written_filepage(WK_PAGEDEP(wk), bp, 0);
+				continue;
+
+			case D_INODEDEP:
+				handle_written_inodeblock(WK_INODEDEP(wk),
+				    bp, 0);
+				continue;
+
+			case D_BMSAFEMAP:
+				handle_written_bmsafemap(WK_BMSAFEMAP(wk),
+				    bp, 0);
+				continue;
+
+			case D_INDIRDEP:
+				handle_written_indirdep(WK_INDIRDEP(wk),
+				    bp, &sbp, 0);
+				continue;
+			default:
+				/* nothing to roll forward */
+				continue;
+			}
+		}
 		return;
+	}
+	if ((wk = LIST_FIRST(&bp->b_dep)) == NULL)
+		return;
+	ump = VFSTOUFS(wk->wk_mp);
 	LIST_INIT(&reattach);
 	/*
 	 * This lock must not be released anywhere in this code segment.
@@ -10673,10 +10984,10 @@
 	 */
 	sbp = NULL;
 	owk = NULL;
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
 		WORKLIST_REMOVE(wk);
-		dep_write[wk->wk_type]++;
+		atomic_add_long(&dep_write[wk->wk_type], 1);
 		if (wk == owk)
 			panic("duplicate worklist: %p\n", wk);
 		owk = wk;
@@ -10683,17 +10994,20 @@
 		switch (wk->wk_type) {
 
 		case D_PAGEDEP:
-			if (handle_written_filepage(WK_PAGEDEP(wk), bp))
+			if (handle_written_filepage(WK_PAGEDEP(wk), bp,
+			    WRITESUCCEEDED))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_INODEDEP:
-			if (handle_written_inodeblock(WK_INODEDEP(wk), bp))
+			if (handle_written_inodeblock(WK_INODEDEP(wk), bp,
+			    WRITESUCCEEDED))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_BMSAFEMAP:
-			if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp))
+			if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp,
+			    WRITESUCCEEDED))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
@@ -10712,7 +11026,8 @@
 			continue;
 
 		case D_INDIRDEP:
-			if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp))
+			if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp,
+			    WRITESUCCEEDED))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
@@ -10758,7 +11073,7 @@
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(&bp->b_dep, wk);
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	if (sbp)
 		brelse(sbp);
 }
@@ -11012,12 +11327,17 @@
  * Called from within softdep_disk_write_complete above to restore
  * in-memory inode block contents to their most up-to-date state. Note
  * that this routine is always called from interrupt level with further
- * splbio interrupts blocked.
+ * interrupts from this device blocked.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
  */
 static int 
-handle_written_inodeblock(inodedep, bp)
+handle_written_inodeblock(inodedep, bp, flags)
 	struct inodedep *inodedep;
 	struct buf *bp;		/* buffer containing the inode block */
+	int flags;
 {
 	struct freefile *freefile;
 	struct allocdirect *adp, *nextadp;
@@ -11047,7 +11367,8 @@
 	/*
 	 * Leave this inodeblock dirty until it's in the list.
 	 */
-	if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) == UNLINKED) {
+	if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) == UNLINKED &&
+	    (flags & WRITESUCCEEDED)) {
 		struct inodedep *inon;
 
 		inon = TAILQ_NEXT(inodedep, id_unlinked);
@@ -11086,7 +11407,8 @@
 			goto bufwait;
 		return (1);
 	}
-	inodedep->id_state |= COMPLETE;
+	if (flags & WRITESUCCEEDED)
+		inodedep->id_state |= COMPLETE;
 	/*
 	 * Roll forward anything that had to be rolled back before 
 	 * the inode could be updated.
@@ -11201,6 +11523,13 @@
 		bdirty(bp);
 bufwait:
 	/*
+	 * If the write did not succeed, we have done all the roll-forward
+	 * operations, but we cannot take the actions that will allow its
+	 * dependencies to be processed.
+	 */
+	if ((flags & WRITESUCCEEDED) == 0)
+		return (hadchanges);
+	/*
 	 * Process any allocdirects that completed during the update.
 	 */
 	if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
@@ -11257,11 +11586,20 @@
 	return (hadchanges);
 }
 
+/*
+ * Perform needed roll-forwards and kick off any dependencies that
+ * can now be processed.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
+ */
 static int
-handle_written_indirdep(indirdep, bp, bpp)
+handle_written_indirdep(indirdep, bp, bpp, flags)
 	struct indirdep *indirdep;
 	struct buf *bp;
 	struct buf **bpp;
+	int flags;
 {
 	struct allocindir *aip;
 	struct buf *sbp;
@@ -11286,11 +11624,21 @@
 	indirdep->ir_state &= ~(UNDONE | IOSTARTED);
 	indirdep->ir_state |= ATTACHED;
 	/*
+	 * If the write did not succeed, we have done all the roll-forward
+	 * operations, but we cannot take the actions that will allow its
+	 * dependencies to be processed.
+	 */
+	if ((flags & WRITESUCCEEDED) == 0) {
+		stat_indir_blk_ptrs++;
+		bdirty(bp);
+		return (1);
+	}
+	/*
 	 * Move allocindirs with written pointers to the completehd if
 	 * the indirdep's pointer is not yet written.  Otherwise
 	 * free them here.
 	 */
-	while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != 0) {
+	while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != NULL) {
 		LIST_REMOVE(aip, ai_next);
 		if ((indirdep->ir_state & DEPCOMPLETE) == 0) {
 			LIST_INSERT_HEAD(&indirdep->ir_completehd, aip,
@@ -11305,7 +11653,7 @@
 	 * the done list to the write list after updating the pointers.
 	 */
 	if (TAILQ_EMPTY(&indirdep->ir_trunc)) {
-		while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) {
+		while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != NULL) {
 			handle_allocindir_partdone(aip);
 			if (aip == LIST_FIRST(&indirdep->ir_donehd))
 				panic("disk_write_complete: not gone");
@@ -11358,8 +11706,8 @@
 }
 
 /*
- * Returns true if the bmsafemap will have rollbacks when written.  Must
- * only be called with lk and the buf lock on the cg held.
+ * Returns true if the bmsafemap will have rollbacks when written.  Must only
+ * be called with the per-filesystem lock and the buf lock on the cg held.
  */
 static int
 bmsafemap_backgroundwrite(bmsafemap, bp)
@@ -11368,6 +11716,7 @@
 {
 	int dirty;
 
+	LOCK_OWNED(VFSTOUFS(bmsafemap->sm_list.wk_mp));
 	dirty = !LIST_EMPTY(&bmsafemap->sm_jaddrefhd) | 
 	    !LIST_EMPTY(&bmsafemap->sm_jnewblkhd);
 	/*
@@ -11438,11 +11787,16 @@
  * Complete a write to a bmsafemap structure.  Roll forward any bitmap
  * changes if it's not a background write.  Set all written dependencies 
  * to DEPCOMPLETE and free the structure if possible.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
  */
 static int
-handle_written_bmsafemap(bmsafemap, bp)
+handle_written_bmsafemap(bmsafemap, bp, flags)
 	struct bmsafemap *bmsafemap;
 	struct buf *bp;
+	int flags;
 {
 	struct newblk *newblk;
 	struct inodedep *inodedep;
@@ -11454,17 +11808,24 @@
 	struct cg *cgp;
 	struct fs *fs;
 	ino_t ino;
+	int foreground;
 	int chgs;
 
 	if ((bmsafemap->sm_state & IOSTARTED) == 0)
-		panic("initiate_write_bmsafemap: Not started\n");
+		panic("handle_written_bmsafemap: Not started\n");
 	ump = VFSTOUFS(bmsafemap->sm_list.wk_mp);
 	chgs = 0;
 	bmsafemap->sm_state &= ~IOSTARTED;
+	foreground = (bp->b_xflags & BX_BKGRDMARKER) == 0;
 	/*
-	 * Release journal work that was waiting on the write.
+	 * If write was successful, release journal work that was waiting
+	 * on the write. Otherwise move the work back.
 	 */
-	handle_jwork(&bmsafemap->sm_freewr);
+	if (flags & WRITESUCCEEDED)
+		handle_jwork(&bmsafemap->sm_freewr);
+	else
+		LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr,
+		    worklist, wk_list);
 
 	/*
 	 * Restore unwritten inode allocation pending jaddref writes.
@@ -11481,7 +11842,8 @@
 			if (isset(inosused, ino))
 				panic("handle_written_bmsafemap: "
 				    "re-allocated inode");
-			if ((bp->b_xflags & BX_BKGRDMARKER) == 0) {
+			/* Do the roll-forward only if it's a real copy. */
+			if (foreground) {
 				if ((jaddref->ja_mode & IFMT) == IFDIR)
 					cgp->cg_cs.cs_ndir++;
 				cgp->cg_cs.cs_nifree--;
@@ -11504,7 +11866,8 @@
 		    jntmp) {
 			if ((jnewblk->jn_state & UNDONE) == 0)
 				continue;
-			if ((bp->b_xflags & BX_BKGRDMARKER) == 0 &&
+			/* Do the roll-forward only if it's a real copy. */
+			if (foreground &&
 			    jnewblk_rollforward(jnewblk, fs, cgp, blksfree))
 				chgs = 1;
 			jnewblk->jn_state &= ~(UNDONE | NEWBLOCK);
@@ -11512,6 +11875,20 @@
 			free_jnewblk(jnewblk);
 		}
 	}
+	/*
+	 * If the write did not succeed, we have done all the roll-forward
+	 * operations, but we cannot take the actions that will allow its
+	 * dependencies to be processed.
+	 */
+	if ((flags & WRITESUCCEEDED) == 0) {
+		LIST_CONCAT(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr,
+		    newblk, nb_deps);
+		LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr,
+		    worklist, wk_list);
+		if (foreground)
+			bdirty(bp);
+		return (1);
+	}
 	while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkwr))) {
 		newblk->nb_state |= DEPCOMPLETE;
 		newblk->nb_state &= ~ONDEPLIST;
@@ -11544,7 +11921,8 @@
 		return (0);
 	}
 	LIST_INSERT_HEAD(&ump->softdep_dirtycg, bmsafemap, sm_next);
-	bdirty(bp);
+	if (foreground)
+		bdirty(bp);
 	return (1);
 }
 
@@ -11614,12 +11992,17 @@
  * A write operation was just completed. Removed inodes can
  * now be freed and associated block pointers may be committed.
  * Note that this routine is always called from interrupt level
- * with further splbio interrupts blocked.
+ * with further interrupts from this device blocked.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
  */
 static int 
-handle_written_filepage(pagedep, bp)
+handle_written_filepage(pagedep, bp, flags)
 	struct pagedep *pagedep;
 	struct buf *bp;		/* buffer containing the written page */
+	int flags;
 {
 	struct dirrem *dirrem;
 	struct diradd *dap, *nextdap;
@@ -11629,6 +12012,8 @@
 	if ((pagedep->pd_state & IOSTARTED) == 0)
 		panic("handle_written_filepage: not started");
 	pagedep->pd_state &= ~IOSTARTED;
+	if ((flags & WRITESUCCEEDED) == 0)
+		goto rollforward;
 	/*
 	 * Process any directory removals that have been committed.
 	 */
@@ -11648,6 +12033,7 @@
 	if ((pagedep->pd_state & NEWBLOCK) == 0)
 		while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
 			free_diradd(dap, NULL);
+rollforward:
 	/*
 	 * Uncommitted directory entries must be restored.
 	 */
@@ -11680,7 +12066,7 @@
 	 * marked dirty so that its will eventually get written back in
 	 * its correct form.
 	 */
-	if (chgs) {
+	if (chgs || (flags & WRITESUCCEEDED) == 0) {
 		if ((bp->b_flags & B_DELWRI) == 0)
 			stat_dir_entry++;
 		bdirty(bp);
@@ -11719,18 +12105,20 @@
 {
 	struct inodedep *inodedep;
 
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ip->i_ump)) != 0,
+	    ("softdep_load_inodeblock called on non-softdep filesystem"));
 	/*
 	 * Check for alternate nlink count.
 	 */
 	ip->i_effnlink = ip->i_nlink;
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ip->i_ump);
 	if (inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0,
 	    &inodedep) == 0) {
-		FREE_LOCK(&lk);
+		FREE_LOCK(ip->i_ump);
 		return;
 	}
 	ip->i_effnlink -= inodedep->id_nlinkdelta;
-	FREE_LOCK(&lk);
+	FREE_LOCK(ip->i_ump);
 }
 
 /*
@@ -11751,6 +12139,7 @@
 {
 	struct inodedep *inodedep;
 	struct inoref *inoref;
+	struct ufsmount *ump;
 	struct worklist *wk;
 	struct mount *mp;
 	struct buf *ibp;
@@ -11757,7 +12146,10 @@
 	struct fs *fs;
 	int error;
 
-	mp = UFSTOVFS(ip->i_ump);
+	ump = ip->i_ump;
+	mp = UFSTOVFS(ump);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_update_inodeblock called on non-softdep filesystem"));
 	fs = ip->i_fs;
 	/*
 	 * Preserve the freelink that is on disk.  clear_unlinked_inodedep()
@@ -11777,10 +12169,10 @@
 	 * if there is no existing inodedep, then there are no dependencies
 	 * to track.
 	 */
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 again:
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) {
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		if (ip->i_effnlink != ip->i_nlink)
 			panic("softdep_update_inodeblock: bad link count");
 		return;
@@ -11839,16 +12231,16 @@
 	 * to be written so that the update can be done.
 	 */
 	if (waitfor == 0) {
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		return;
 	}
 retry:
 	if ((inodedep->id_state & (DEPCOMPLETE | GOINGAWAY)) != 0) {
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		return;
 	}
 	ibp = inodedep->id_bmsafemap->sm_buf;
-	ibp = getdirtybuf(ibp, &lk, MNT_WAIT);
+	ibp = getdirtybuf(ibp, LOCK_PTR(ump), MNT_WAIT);
 	if (ibp == NULL) {
 		/*
 		 * If ibp came back as NULL, the dependency could have been
@@ -11857,10 +12249,10 @@
 		 */
 		if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0)
 			goto retry;
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		return;
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	if ((error = bwrite(ibp)) != 0)
 		softdep_error("softdep_update_inodeblock: bwrite", error);
 }
@@ -11909,6 +12301,7 @@
 	struct inodedep *inodedep;
 	struct pagedep *pagedep;
 	struct inoref *inoref;
+	struct ufsmount *ump;
 	struct worklist *wk;
 	struct diradd *dap;
 	struct mount *mp;
@@ -11923,11 +12316,14 @@
 
 	ip = VTOI(vp);
 	fs = ip->i_fs;
+	ump = ip->i_ump;
 	mp = vp->v_mount;
-	ACQUIRE_LOCK(&lk);
+	if (MOUNTEDSOFTDEP(mp) == 0)
+		return (0);
+	ACQUIRE_LOCK(ump);
 restart:
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) {
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		return (0);
 	}
 	TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
@@ -11985,7 +12381,7 @@
 		 * the lock on our parent. See the comment in ufs_lookup
 		 * for details on possible races.
 		 */
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		if (ffs_vgetf(mp, parentino, LK_NOWAIT | LK_EXCLUSIVE, &pvp,
 		    FFSV_FORCEINSMQ)) {
 			error = vfs_busy(mp, MBF_NOWAIT);
@@ -12030,7 +12426,7 @@
 				vput(pvp);
 				return (error);
 			}
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 			locked = 1;
 			if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0) {
 				if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) != NULL) {
@@ -12043,7 +12439,7 @@
 					else
 						pagedep = dap->da_pagedep;
 					pagedep_new_block = pagedep->pd_state & NEWBLOCK;
-					FREE_LOCK(&lk);
+					FREE_LOCK(ump);
 					locked = 0;
 					if (pagedep_new_block && (error =
 					    ffs_syncvnode(pvp, MNT_WAIT, 0))) {
@@ -12053,7 +12449,7 @@
 				}
 			}
 			if (locked)
-				FREE_LOCK(&lk);
+				FREE_LOCK(ump);
 		}
 		/*
 		 * Flush directory page containing the inode's name.
@@ -12067,11 +12463,11 @@
 		vput(pvp);
 		if (error != 0)
 			return (error);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 		if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0)
 			break;
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	return (0);
 }
 
@@ -12095,7 +12491,6 @@
 	bo = &vp->v_bufobj;
 restart:
 	BO_LOCK(bo);
-	ACQUIRE_LOCK(&lk);
 	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 		/* 
 		 * If it is already scheduled, skip to the next buffer.
@@ -12115,13 +12510,11 @@
 			BUF_UNLOCK(bp);
 			continue;
 		}
-		FREE_LOCK(&lk);
 		BO_UNLOCK(bo);
 		bremfree(bp);
 		(void) bawrite(bp);
 		goto restart;
 	}
-	FREE_LOCK(&lk);
 	drain_output(vp);
 	BO_UNLOCK(bo);
 }
@@ -12128,7 +12521,7 @@
 
 /*
  * Sync all cylinder groups that were dirty at the time this function is
- * called.  Newly dirtied cgs will be inserted before the sintenel.  This
+ * called.  Newly dirtied cgs will be inserted before the sentinel.  This
  * is used to flush freedep activity that may be holding up writes to a
  * indirect block.
  */
@@ -12138,25 +12531,25 @@
 	int waitfor;
 {
 	struct bmsafemap *bmsafemap;
-	struct bmsafemap *sintenel;
+	struct bmsafemap *sentinel;
 	struct ufsmount *ump;
 	struct buf *bp;
 	int error;
 
-	sintenel = malloc(sizeof(*sintenel), M_BMSAFEMAP, M_ZERO | M_WAITOK);
-	sintenel->sm_cg = -1;
+	sentinel = malloc(sizeof(*sentinel), M_BMSAFEMAP, M_ZERO | M_WAITOK);
+	sentinel->sm_cg = -1;
 	ump = VFSTOUFS(mp);
 	error = 0;
-	ACQUIRE_LOCK(&lk);
-	LIST_INSERT_HEAD(&ump->softdep_dirtycg, sintenel, sm_next);
-	for (bmsafemap = LIST_NEXT(sintenel, sm_next); bmsafemap != NULL;
-	    bmsafemap = LIST_NEXT(sintenel, sm_next)) {
-		/* Skip sintenels and cgs with no work to release. */
+	ACQUIRE_LOCK(ump);
+	LIST_INSERT_HEAD(&ump->softdep_dirtycg, sentinel, sm_next);
+	for (bmsafemap = LIST_NEXT(sentinel, sm_next); bmsafemap != NULL;
+	    bmsafemap = LIST_NEXT(sentinel, sm_next)) {
+		/* Skip sentinels and cgs with no work to release. */
 		if (bmsafemap->sm_cg == -1 ||
 		    (LIST_EMPTY(&bmsafemap->sm_freehd) &&
 		    LIST_EMPTY(&bmsafemap->sm_freewr))) {
-			LIST_REMOVE(sintenel, sm_next);
-			LIST_INSERT_AFTER(bmsafemap, sintenel, sm_next);
+			LIST_REMOVE(sentinel, sm_next);
+			LIST_INSERT_AFTER(bmsafemap, sentinel, sm_next);
 			continue;
 		}
 		/*
@@ -12163,25 +12556,25 @@
 		 * If we don't get the lock and we're waiting try again, if
 		 * not move on to the next buf and try to sync it.
 		 */
-		bp = getdirtybuf(bmsafemap->sm_buf, &lk, waitfor);
+		bp = getdirtybuf(bmsafemap->sm_buf, LOCK_PTR(ump), waitfor);
 		if (bp == NULL && waitfor == MNT_WAIT)
 			continue;
-		LIST_REMOVE(sintenel, sm_next);
-		LIST_INSERT_AFTER(bmsafemap, sintenel, sm_next);
+		LIST_REMOVE(sentinel, sm_next);
+		LIST_INSERT_AFTER(bmsafemap, sentinel, sm_next);
 		if (bp == NULL)
 			continue;
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		if (waitfor == MNT_NOWAIT)
 			bawrite(bp);
 		else
 			error = bwrite(bp);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 		if (error)
 			break;
 	}
-	LIST_REMOVE(sintenel, sm_next);
-	FREE_LOCK(&lk);
-	free(sintenel, M_BMSAFEMAP);
+	LIST_REMOVE(sentinel, sm_next);
+	FREE_LOCK(ump);
+	free(sentinel, M_BMSAFEMAP);
 	return (error);
 }
 
@@ -12193,25 +12586,29 @@
 int
 softdep_sync_metadata(struct vnode *vp)
 {
+	struct inode *ip;
 	int error;
 
+	ip = VTOI(vp);
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ip->i_ump)) != 0,
+	    ("softdep_sync_metadata called on non-softdep filesystem"));
 	/*
 	 * Ensure that any direct block dependencies have been cleared,
 	 * truncations are started, and inode references are journaled.
 	 */
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ip->i_ump);
 	/*
 	 * Write all journal records to prevent rollbacks on devvp.
 	 */
 	if (vp->v_type == VCHR)
 		softdep_flushjournal(vp->v_mount);
-	error = flush_inodedep_deps(vp, vp->v_mount, VTOI(vp)->i_number);
+	error = flush_inodedep_deps(vp, vp->v_mount, ip->i_number);
 	/*
 	 * Ensure that all truncates are written so we won't find deps on
 	 * indirect blocks.
 	 */
 	process_truncates(vp);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ip->i_ump);
 
 	return (error);
 }
@@ -12230,10 +12627,13 @@
 	struct pagedep *pagedep;
 	struct allocindir *aip;
 	struct newblk *newblk;
+	struct ufsmount *ump;
 	struct buf *nbp;
 	struct worklist *wk;
 	int i, error;
 
+	KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
+	    ("softdep_sync_buf called on non-softdep filesystem"));
 	/*
 	 * For VCHR we just don't want to force flush any dependencies that
 	 * will cause rollbacks.
@@ -12243,7 +12643,8 @@
 			return (EBUSY);
 		return (0);
 	}
-	ACQUIRE_LOCK(&lk);
+	ump = VTOI(vp)->i_ump;
+	ACQUIRE_LOCK(ump);
 	/*
 	 * As we hold the buffer locked, none of its dependencies
 	 * will disappear.
@@ -12268,13 +12669,13 @@
 			    waitfor == MNT_NOWAIT)
 				continue;
 			nbp = newblk->nb_bmsafemap->sm_buf;
-			nbp = getdirtybuf(nbp, &lk, waitfor);
+			nbp = getdirtybuf(nbp, LOCK_PTR(ump), waitfor);
 			if (nbp == NULL)
 				goto top;
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			if ((error = bwrite(nbp)) != 0)
 				goto out;
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 			continue;
 
 		case D_INDIRDEP:
@@ -12299,13 +12700,13 @@
 				if (newblk->nb_state & DEPCOMPLETE)
 					continue;
 				nbp = newblk->nb_bmsafemap->sm_buf;
-				nbp = getdirtybuf(nbp, &lk, waitfor);
+				nbp = getdirtybuf(nbp, LOCK_PTR(ump), waitfor);
 				if (nbp == NULL)
 					goto restart;
-				FREE_LOCK(&lk);
+				FREE_LOCK(ump);
 				if ((error = bwrite(nbp)) != 0)
 					goto out;
-				ACQUIRE_LOCK(&lk);
+				ACQUIRE_LOCK(ump);
 				goto restart;
 			}
 			continue;
@@ -12356,7 +12757,7 @@
 		}
 	}
 out_unlock:
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 out:
 	return (error);
 }
@@ -12373,6 +12774,7 @@
 {
 	struct inodedep *inodedep;
 	struct inoref *inoref;
+	struct ufsmount *ump;
 	int error, waitfor;
 
 	/*
@@ -12388,11 +12790,13 @@
 	 * We give a brief window at the top of the loop to allow
 	 * any pending I/O to complete.
 	 */
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
 	for (error = 0, waitfor = MNT_NOWAIT; ; ) {
 		if (error)
 			return (error);
-		FREE_LOCK(&lk);
-		ACQUIRE_LOCK(&lk);
+		FREE_LOCK(ump);
+		ACQUIRE_LOCK(ump);
 restart:
 		if (inodedep_lookup(mp, ino, 0, &inodedep) == 0)
 			return (0);
@@ -12435,9 +12839,13 @@
 {
 	struct allocdirect *adp;
 	struct newblk *newblk;
+	struct ufsmount *ump;
 	struct buf *bp;
 
-	mtx_assert(&lk, MA_OWNED);
+	if ((adp = TAILQ_FIRST(listhead)) == NULL)
+		return (0);
+	ump = VFSTOUFS(adp->ad_list.wk_mp);
+	LOCK_OWNED(ump);
 	TAILQ_FOREACH(adp, listhead, ad_next) {
 		newblk = (struct newblk *)adp;
 		if (newblk->nb_jnewblk != NULL) {
@@ -12447,18 +12855,18 @@
 		if (newblk->nb_state & DEPCOMPLETE)
 			continue;
 		bp = newblk->nb_bmsafemap->sm_buf;
-		bp = getdirtybuf(bp, &lk, waitfor);
+		bp = getdirtybuf(bp, LOCK_PTR(ump), waitfor);
 		if (bp == NULL) {
 			if (waitfor == MNT_NOWAIT)
 				continue;
 			return (1);
 		}
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		if (waitfor == MNT_NOWAIT)
 			bawrite(bp);
 		else 
 			*errorp = bwrite(bp);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 		return (1);
 	}
 	return (0);
@@ -12474,6 +12882,7 @@
 	ufs_lbn_t lbn;
 {
 	struct newblk *newblk;
+	struct ufsmount *ump;
 	struct bufobj *bo;
 	struct inode *ip;
 	struct buf *bp;
@@ -12486,7 +12895,8 @@
 	blkno = DIP(ip, i_db[lbn]);
 	if (blkno == 0)
 		panic("flush_newblk_dep: Missing block");
-	ACQUIRE_LOCK(&lk);
+	ump = VFSTOUFS(mp);
+	ACQUIRE_LOCK(ump);
 	/*
 	 * Loop until all dependencies related to this block are satisfied.
 	 * We must be careful to restart after each sleep in case a write
@@ -12494,7 +12904,7 @@
 	 */
 	for (;;) {
 		if (newblk_lookup(mp, blkno, 0, &newblk) == 0) {
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			break;
 		}
 		if (newblk->nb_list.wk_type != D_ALLOCDIRECT)
@@ -12511,27 +12921,28 @@
 		 */
 		if ((newblk->nb_state & DEPCOMPLETE) == 0) {
 			bp = newblk->nb_bmsafemap->sm_buf;
-			bp = getdirtybuf(bp, &lk, MNT_WAIT);
+			bp = getdirtybuf(bp, LOCK_PTR(ump), MNT_WAIT);
 			if (bp == NULL)
 				continue;
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			error = bwrite(bp);
 			if (error)
 				break;
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 			continue;
 		}
 		/*
 		 * Write the buffer.
 		 */
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		BO_LOCK(bo);
 		bp = gbincore(bo, lbn);
 		if (bp != NULL) {
 			error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
-			    LK_INTERLOCK, BO_MTX(bo));
+			    LK_INTERLOCK, BO_LOCKPTR(bo));
 			if (error == ENOLCK) {
-				ACQUIRE_LOCK(&lk);
+				ACQUIRE_LOCK(ump);
+				error = 0;
 				continue; /* Slept, retry */
 			}
 			if (error != 0)
@@ -12553,7 +12964,7 @@
 		error = ffs_update(vp, 1);
 		if (error)
 			break;
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 	}
 	return (error);
 }
@@ -12576,8 +12987,11 @@
 	int error = 0;
 	struct buf *bp;
 	ino_t inum;
+	struct diraddhd unfinished;
 
+	LIST_INIT(&unfinished);
 	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
 restart:
 	while ((dap = LIST_FIRST(diraddhdp)) != NULL) {
 		/*
@@ -12585,17 +12999,29 @@
 		 * has a MKDIR_PARENT dependency.
 		 */
 		if (dap->da_state & MKDIR_PARENT) {
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			if ((error = ffs_update(pvp, 1)) != 0)
 				break;
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 			/*
 			 * If that cleared dependencies, go on to next.
 			 */
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
-			if (dap->da_state & MKDIR_PARENT)
-				panic("flush_pagedep_deps: MKDIR_PARENT");
+			/*
+			 * All MKDIR_PARENT dependencies and all the
+			 * NEWBLOCK pagedeps that are contained in direct
+			 * blocks were resolved by doing above ffs_update.
+			 * Pagedeps contained in indirect blocks may
+			 * require a complete sync'ing of the directory.
+			 * We are in the midst of doing a complete sync,
+			 * so if they are not resolved in this pass we
+			 * defer them for now as they will be sync'ed by
+			 * our caller shortly.
+			 */
+			LIST_REMOVE(dap, da_pdlist);
+			LIST_INSERT_HEAD(&unfinished, dap, da_pdlist);
+			continue;
 		}
 		/*
 		 * A newly allocated directory must have its "." and
@@ -12617,7 +13043,7 @@
 			}
 		}
 		if (dap->da_state & MKDIR_BODY) {
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
 			    FFSV_FORCEINSMQ)))
 				break;
@@ -12632,7 +13058,7 @@
 			vput(vp);
 			if (error != 0)
 				break;
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 			/*
 			 * If that cleared dependencies, go on to next.
 			 */
@@ -12665,13 +13091,13 @@
 		 */
 		if ((inodedep->id_state & (DEPCOMPLETE | GOINGAWAY)) == 0) {
 			bp = inodedep->id_bmsafemap->sm_buf;
-			bp = getdirtybuf(bp, &lk, MNT_WAIT);
+			bp = getdirtybuf(bp, LOCK_PTR(ump), MNT_WAIT);
 			if (bp == NULL)
 				goto retry;
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			if ((error = bwrite(bp)) != 0)
 				break;
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
 		}
@@ -12681,7 +13107,7 @@
 		 * adjusted update it here to flush it to disk.
 		 */
 		if (dap == LIST_FIRST(diraddhdp)) {
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
 			    FFSV_FORCEINSMQ)))
 				break;
@@ -12689,7 +13115,7 @@
 			vput(vp);
 			if (error)
 				break;
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 		}
 		/*
 		 * If we have failed to get rid of all the dependencies
@@ -12698,11 +13124,16 @@
 		if (dap == LIST_FIRST(diraddhdp)) {
 			inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep);
 			panic("flush_pagedep_deps: failed to flush " 
-			    "inodedep %p ino %d dap %p", inodedep, inum, dap);
+			    "inodedep %p ino %ju dap %p",
+			    inodedep, (uintmax_t)inum, dap);
 		}
 	}
 	if (error)
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
+	while ((dap = LIST_FIRST(&unfinished)) != NULL) {
+		LIST_REMOVE(dap, da_pdlist);
+		LIST_INSERT_HEAD(diraddhdp, dap, da_pdlist);
+	}
 	return (error);
 }
 
@@ -12721,28 +13152,54 @@
 	int jlow;
 	int max_softdeps_hard;
 
-	ACQUIRE_LOCK(&lk);
+	KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
+	    ("softdep_slowdown called on non-softdep filesystem"));
+	ump = VFSTOUFS(vp->v_mount);
+	ACQUIRE_LOCK(ump);
 	jlow = 0;
 	/*
 	 * Check for journal space if needed.
 	 */
 	if (DOINGSUJ(vp)) {
-		ump = VFSTOUFS(vp->v_mount);
 		if (journal_space(ump, 0) == 0)
 			jlow = 1;
 	}
+	/*
+	 * If the system is under its limits and our filesystem is
+	 * not responsible for more than our share of the usage and
+	 * we are not low on journal space, then no need to slow down.
+	 */
 	max_softdeps_hard = max_softdeps * 11 / 10;
 	if (dep_current[D_DIRREM] < max_softdeps_hard / 2 &&
 	    dep_current[D_INODEDEP] < max_softdeps_hard &&
-	    VFSTOUFS(vp->v_mount)->um_numindirdeps < maxindirdeps &&
-	    dep_current[D_FREEBLKS] < max_softdeps_hard && jlow == 0) {
-		FREE_LOCK(&lk);
+	    dep_current[D_INDIRDEP] < max_softdeps_hard / 1000 &&
+	    dep_current[D_FREEBLKS] < max_softdeps_hard && jlow == 0 &&
+	    ump->softdep_curdeps[D_DIRREM] <
+	    (max_softdeps_hard / 2) / stat_flush_threads &&
+	    ump->softdep_curdeps[D_INODEDEP] <
+	    max_softdeps_hard / stat_flush_threads &&
+	    ump->softdep_curdeps[D_INDIRDEP] <
+	    (max_softdeps_hard / 1000) / stat_flush_threads &&
+	    ump->softdep_curdeps[D_FREEBLKS] <
+	    max_softdeps_hard / stat_flush_threads) {
+		FREE_LOCK(ump);
   		return (0);
 	}
-	if (VFSTOUFS(vp->v_mount)->um_numindirdeps >= maxindirdeps || jlow)
-		softdep_speedup();
+	/*
+	 * If the journal is low or our filesystem is over its limit
+	 * then speedup the cleanup.
+	 */
+	if (ump->softdep_curdeps[D_INDIRDEP] <
+	    (max_softdeps_hard / 1000) / stat_flush_threads || jlow)
+		softdep_speedup(ump);
 	stat_sync_limit_hit += 1;
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
+	/*
+	 * We only slow down the rate at which new dependencies are
+	 * generated if we are not using journaling. With journaling,
+	 * the cleanup should always be sufficient to keep things
+	 * under control.
+	 */
 	if (DOINGSUJ(vp))
 		return (0);
 	return (1);
@@ -12795,19 +13252,18 @@
 	mtx_assert(UFS_MTX(ump), MA_OWNED);
 	UFS_UNLOCK(ump);
 	error = ffs_update(vp, 1);
-	if (error != 0) {
+	if (error != 0 || MOUNTEDSOFTDEP(mp) == 0) {
 		UFS_LOCK(ump);
 		return (0);
 	}
 	/*
-	 * If we are in need of resources, consider pausing for
-	 * tickdelay to give ourselves some breathing room.
+	 * If we are in need of resources, start by cleaning up
+	 * any block removals associated with our inode.
 	 */
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	process_removes(vp);
 	process_truncates(vp);
-	request_cleanup(UFSTOVFS(ump), resource);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	/*
 	 * Now clean up at least as many resources as we will need.
 	 *
@@ -12850,12 +13306,12 @@
 	    fs->fs_cstotal.cs_nbfree <= needed) ||
 	    (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
 	    fs->fs_cstotal.cs_nifree <= needed)) {
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 		if (ump->softdep_on_worklist > 0 &&
 		    process_worklist_item(UFSTOVFS(ump),
 		    ump->softdep_on_worklist, LK_NOWAIT) != 0)
 			stat_worklist_push += 1;
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 	}
 	/*
 	 * If we still need resources and there are no more worklist
@@ -12901,11 +13357,98 @@
 	return (1);
 }
 
+static bool
+softdep_excess_items(struct ufsmount *ump, int item)
+{
+
+	KASSERT(item >= 0 && item < D_LAST, ("item %d", item));
+	return (dep_current[item] > max_softdeps &&
+	    ump->softdep_curdeps[item] > max_softdeps /
+	    stat_flush_threads);
+}
+
+static void
+schedule_cleanup(struct mount *mp)
+{
+	struct ufsmount *ump;
+	struct thread *td;
+
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	FREE_LOCK(ump);
+	td = curthread;
+	if ((td->td_pflags & TDP_KTHREAD) != 0 &&
+	    (td->td_proc->p_flag2 & P2_AST_SU) == 0) {
+		/*
+		 * No ast is delivered to kernel threads, so nobody
+		 * would deref the mp.  Some kernel threads
+		 * explicitely check for AST, e.g. NFS daemon does
+		 * this in the serving loop.
+		 */
+		return;
+	}
+	if (td->td_su != NULL)
+		vfs_rel(td->td_su);
+	vfs_ref(mp);
+	td->td_su = mp;
+	thread_lock(td);
+	td->td_flags |= TDF_ASTPENDING;
+	thread_unlock(td);
+}
+
+static void
+softdep_ast_cleanup_proc(void)
+{
+	struct thread *td;
+	struct mount *mp;
+	struct ufsmount *ump;
+	int error;
+	bool req;
+
+	td = curthread;
+	while ((mp = td->td_su) != NULL) {
+		td->td_su = NULL;
+		error = vfs_busy(mp, MBF_NOWAIT);
+		vfs_rel(mp);
+		if (error != 0)
+			return;
+		if (ffs_own_mount(mp) && MOUNTEDSOFTDEP(mp)) {
+			ump = VFSTOUFS(mp);
+			for (;;) {
+				req = false;
+				ACQUIRE_LOCK(ump);
+				if (softdep_excess_items(ump, D_INODEDEP)) {
+					req = true;
+					request_cleanup(mp, FLUSH_INODES);
+				}
+				if (softdep_excess_items(ump, D_DIRREM)) {
+					req = true;
+					request_cleanup(mp, FLUSH_BLOCKS);
+				}
+				FREE_LOCK(ump);
+				if (softdep_excess_items(ump, D_NEWBLK) ||
+				    softdep_excess_items(ump, D_ALLOCDIRECT) ||
+				    softdep_excess_items(ump, D_ALLOCINDIR)) {
+					error = vn_start_write(NULL, &mp,
+					    V_WAIT);
+					if (error == 0) {
+						req = true;
+						VFS_SYNC(mp, MNT_WAIT);
+						vn_finished_write(mp);
+					}
+				}
+				if ((td->td_pflags & TDP_KTHREAD) != 0 || !req)
+					break;
+			}
+		}
+		vfs_unbusy(mp);
+	}
+}
+
 /*
  * If memory utilization has gotten too high, deliberately slow things
  * down and speed up the I/O processing.
  */
-extern struct thread *syncertd;
 static int
 request_cleanup(mp, resource)
 	struct mount *mp;
@@ -12914,13 +13457,13 @@
 	struct thread *td = curthread;
 	struct ufsmount *ump;
 
-	mtx_assert(&lk, MA_OWNED);
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
 	/*
 	 * We never hold up the filesystem syncer or buf daemon.
 	 */
 	if (td->td_pflags & (TDP_SOFTDEP|TDP_NORUNNINGBUF))
 		return (0);
-	ump = VFSTOUFS(mp);
 	/*
 	 * First check to see if the work list has gotten backlogged.
 	 * If it has, co-opt this process to help clean up two entries.
@@ -12940,7 +13483,7 @@
 	 * Next, we attempt to speed up the syncer process. If that
 	 * is successful, then we allow the process to continue.
 	 */
-	if (softdep_speedup() &&
+	if (softdep_speedup(ump) &&
 	    resource != FLUSH_BLOCKS_WAIT &&
 	    resource != FLUSH_INODES_WAIT)
 		return(0);
@@ -12958,15 +13501,19 @@
 
 	case FLUSH_INODES:
 	case FLUSH_INODES_WAIT:
+		ACQUIRE_GBLLOCK(&lk);
 		stat_ino_limit_push += 1;
 		req_clear_inodedeps += 1;
+		FREE_GBLLOCK(&lk);
 		stat_countp = &stat_ino_limit_hit;
 		break;
 
 	case FLUSH_BLOCKS:
 	case FLUSH_BLOCKS_WAIT:
+		ACQUIRE_GBLLOCK(&lk);
 		stat_blk_limit_push += 1;
 		req_clear_remove += 1;
+		FREE_GBLLOCK(&lk);
 		stat_countp = &stat_blk_limit_hit;
 		break;
 
@@ -12977,19 +13524,25 @@
 	 * Hopefully the syncer daemon will catch up and awaken us.
 	 * We wait at most tickdelay before proceeding in any case.
 	 */
+	ACQUIRE_GBLLOCK(&lk);
+	FREE_LOCK(ump);
 	proc_waiting += 1;
 	if (callout_pending(&softdep_callout) == FALSE)
 		callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2,
 		    pause_timer, 0);
 
-	msleep((caddr_t)&proc_waiting, &lk, PPAUSE, "softupdate", 0);
+	if ((td->td_pflags & TDP_KTHREAD) == 0)
+		msleep((caddr_t)&proc_waiting, &lk, PPAUSE, "softupdate", 0);
 	proc_waiting -= 1;
+	FREE_GBLLOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	return (1);
 }
 
 /*
  * Awaken processes pausing in request_cleanup and clear proc_waiting
- * to indicate that there is no longer a timer running.
+ * to indicate that there is no longer a timer running. Pause_timer
+ * will be called with the global softdep mutex (&lk) locked.
  */
 static void
 pause_timer(arg)
@@ -12996,48 +13549,80 @@
 	void *arg;
 {
 
+	GBLLOCK_OWNED(&lk);
 	/*
 	 * The callout_ API has acquired mtx and will hold it around this
 	 * function call.
 	 */
-	*stat_countp += 1;
-	wakeup_one(&proc_waiting);
-	if (proc_waiting > 0)
-		callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2,
-		    pause_timer, 0);
+	*stat_countp += proc_waiting;
+	wakeup(&proc_waiting);
 }
 
 /*
+ * If requested, try removing inode or removal dependencies.
+ */
+static void
+check_clear_deps(mp)
+	struct mount *mp;
+{
+
+	/*
+	 * If we are suspended, it may be because of our using
+	 * too many inodedeps, so help clear them out.
+	 */
+	if (MOUNTEDSUJ(mp) && VFSTOUFS(mp)->softdep_jblocks->jb_suspended)
+		clear_inodedeps(mp);
+	/*
+	 * General requests for cleanup of backed up dependencies
+	 */
+	ACQUIRE_GBLLOCK(&lk);
+	if (req_clear_inodedeps) {
+		req_clear_inodedeps -= 1;
+		FREE_GBLLOCK(&lk);
+		clear_inodedeps(mp);
+		ACQUIRE_GBLLOCK(&lk);
+		wakeup(&proc_waiting);
+	}
+	if (req_clear_remove) {
+		req_clear_remove -= 1;
+		FREE_GBLLOCK(&lk);
+		clear_remove(mp);
+		ACQUIRE_GBLLOCK(&lk);
+		wakeup(&proc_waiting);
+	}
+	FREE_GBLLOCK(&lk);
+}
+
+/*
  * Flush out a directory with at least one removal dependency in an effort to
  * reduce the number of dirrem, freefile, and freeblks dependency structures.
  */
 static void
-clear_remove(td)
-	struct thread *td;
+clear_remove(mp)
+	struct mount *mp;
 {
 	struct pagedep_hashhead *pagedephd;
 	struct pagedep *pagedep;
-	static int next = 0;
-	struct mount *mp;
+	struct ufsmount *ump;
 	struct vnode *vp;
 	struct bufobj *bo;
 	int error, cnt;
 	ino_t ino;
 
-	mtx_assert(&lk, MA_OWNED);
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
 
-	for (cnt = 0; cnt <= pagedep_hash; cnt++) {
-		pagedephd = &pagedep_hashtbl[next++];
-		if (next > pagedep_hash)
-			next = 0;
+	for (cnt = 0; cnt <= ump->pagedep_hash_size; cnt++) {
+		pagedephd = &ump->pagedep_hashtbl[ump->pagedep_nextclean++];
+		if (ump->pagedep_nextclean > ump->pagedep_hash_size)
+			ump->pagedep_nextclean = 0;
 		LIST_FOREACH(pagedep, pagedephd, pd_hash) {
 			if (LIST_EMPTY(&pagedep->pd_dirremhd))
 				continue;
-			mp = pagedep->pd_list.wk_mp;
 			ino = pagedep->pd_ino;
 			if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
 				continue;
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 
 			/*
 			 * Let unmount clear deps
@@ -13061,7 +13646,7 @@
 			vput(vp);
 		finish_write:
 			vn_finished_write(mp);
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 			return;
 		}
 	}
@@ -13072,35 +13657,34 @@
  * the number of inodedep dependency structures.
  */
 static void
-clear_inodedeps(td)
-	struct thread *td;
+clear_inodedeps(mp)
+	struct mount *mp;
 {
 	struct inodedep_hashhead *inodedephd;
 	struct inodedep *inodedep;
-	static int next = 0;
-	struct mount *mp;
+	struct ufsmount *ump;
 	struct vnode *vp;
 	struct fs *fs;
 	int error, cnt;
 	ino_t firstino, lastino, ino;
 
-	mtx_assert(&lk, MA_OWNED);
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	LOCK_OWNED(ump);
 	/*
 	 * Pick a random inode dependency to be cleared.
 	 * We will then gather up all the inodes in its block 
 	 * that have dependencies and flush them out.
 	 */
-	for (cnt = 0; cnt <= inodedep_hash; cnt++) {
-		inodedephd = &inodedep_hashtbl[next++];
-		if (next > inodedep_hash)
-			next = 0;
+	for (cnt = 0; cnt <= ump->inodedep_hash_size; cnt++) {
+		inodedephd = &ump->inodedep_hashtbl[ump->inodedep_nextclean++];
+		if (ump->inodedep_nextclean > ump->inodedep_hash_size)
+			ump->inodedep_nextclean = 0;
 		if ((inodedep = LIST_FIRST(inodedephd)) != NULL)
 			break;
 	}
 	if (inodedep == NULL)
 		return;
-	fs = inodedep->id_fs;
-	mp = inodedep->id_list.wk_mp;
 	/*
 	 * Find the last inode in the block with dependencies.
 	 */
@@ -13118,11 +13702,11 @@
 			continue;
 		if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
 			continue;
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		error = vfs_busy(mp, MBF_NOWAIT); /* Let unmount clear deps */
 		if (error != 0) {
 			vn_finished_write(mp);
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 			return;
 		}
 		if ((error = ffs_vgetf(mp, ino, LK_EXCLUSIVE, &vp,
@@ -13130,7 +13714,7 @@
 			softdep_error("clear_inodedeps: vget", error);
 			vfs_unbusy(mp);
 			vn_finished_write(mp);
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 			return;
 		}
 		vfs_unbusy(mp);
@@ -13146,7 +13730,7 @@
 		}
 		vput(vp);
 		vn_finished_write(mp);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 	}
 }
 
@@ -13156,13 +13740,19 @@
 	struct workhead *wkhd;
 {
 	struct worklist *wk;
+	struct ufsmount *ump;
 
-	ACQUIRE_LOCK(&lk);
+	if ((wk = LIST_FIRST(wkhd)) == NULL)
+		return;
+	KASSERT(MOUNTEDSOFTDEP(wk->wk_mp) != 0,
+	    ("softdep_buf_append called on non-softdep filesystem"));
+	ump = VFSTOUFS(wk->wk_mp);
+	ACQUIRE_LOCK(ump);
 	while ((wk = LIST_FIRST(wkhd)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(&bp->b_dep, wk);
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 
 }
 
@@ -13176,10 +13766,13 @@
 	struct fs *fs;
 	int error;
 
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ip->i_ump)) != 0,
+	    ("softdep_inode_append called on non-softdep filesystem"));
 	fs = ip->i_fs;
 	error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 	    (int)fs->fs_bsize, cred, &bp);
 	if (error) {
+		bqrelse(bp);
 		softdep_freework(wkhd);
 		return;
 	}
@@ -13191,10 +13784,17 @@
 softdep_freework(wkhd)
 	struct workhead *wkhd;
 {
+	struct worklist *wk;
+	struct ufsmount *ump;
 
-	ACQUIRE_LOCK(&lk);
+	if ((wk = LIST_FIRST(wkhd)) == NULL)
+		return;
+	KASSERT(MOUNTEDSOFTDEP(wk->wk_mp) != 0,
+	    ("softdep_freework called on non-softdep filesystem"));
+	ump = VFSTOUFS(wk->wk_mp);
+	ACQUIRE_LOCK(ump);
 	handle_jwork(wkhd);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 }
 
 /*
@@ -13208,6 +13808,7 @@
 	int wantcount;
 {
 	struct worklist *wk;
+	struct ufsmount *ump;
 	struct bmsafemap *bmsafemap;
 	struct freework *freework;
 	struct inodedep *inodedep;
@@ -13222,7 +13823,10 @@
 	int i, retval;
 
 	retval = 0;
-	ACQUIRE_LOCK(&lk);
+	if ((wk = LIST_FIRST(&bp->b_dep)) == NULL)
+		return (0);
+	ump = VFSTOUFS(wk->wk_mp);
+	ACQUIRE_LOCK(ump);
 	LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 		switch (wk->wk_type) {
 
@@ -13355,7 +13959,7 @@
 		}
 	}
 out:
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	return retval;
 }
 
@@ -13365,42 +13969,41 @@
  * Return acquired buffer or NULL on failure.
  */
 static struct buf *
-getdirtybuf(bp, mtx, waitfor)
+getdirtybuf(bp, lock, waitfor)
 	struct buf *bp;
-	struct mtx *mtx;
+	struct rwlock *lock;
 	int waitfor;
 {
 	int error;
 
-	mtx_assert(mtx, MA_OWNED);
 	if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0) {
 		if (waitfor != MNT_WAIT)
 			return (NULL);
 		error = BUF_LOCK(bp,
-		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, mtx);
+		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, lock);
 		/*
 		 * Even if we sucessfully acquire bp here, we have dropped
-		 * mtx, which may violates our guarantee.
+		 * lock, which may violates our guarantee.
 		 */
 		if (error == 0)
 			BUF_UNLOCK(bp);
 		else if (error != ENOLCK)
 			panic("getdirtybuf: inconsistent lock: %d", error);
-		mtx_lock(mtx);
+		rw_wlock(lock);
 		return (NULL);
 	}
 	if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
-		if (mtx == &lk && waitfor == MNT_WAIT) {
-			mtx_unlock(mtx);
+		if (lock != BO_LOCKPTR(bp->b_bufobj) && waitfor == MNT_WAIT) {
+			rw_wunlock(lock);
 			BO_LOCK(bp->b_bufobj);
 			BUF_UNLOCK(bp);
 			if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
 				bp->b_vflags |= BV_BKGRDWAIT;
-				msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj),
+				msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj),
 				       PRIBIO | PDROP, "getbuf", 0);
 			} else
 				BO_UNLOCK(bp->b_bufobj);
-			mtx_lock(mtx);
+			rw_wlock(lock);
 			return (NULL);
 		}
 		BUF_UNLOCK(bp);
@@ -13407,15 +14010,15 @@
 		if (waitfor != MNT_WAIT)
 			return (NULL);
 		/*
-		 * The mtx argument must be bp->b_vp's mutex in
+		 * The lock argument must be bp->b_vp's mutex in
 		 * this case.
 		 */
 #ifdef	DEBUG_VFS_LOCKS
 		if (bp->b_vp->v_type != VCHR)
-			ASSERT_BO_LOCKED(bp->b_bufobj);
+			ASSERT_BO_WLOCKED(bp->b_bufobj);
 #endif
 		bp->b_vflags |= BV_BKGRDWAIT;
-		msleep(&bp->b_xflags, mtx, PRIBIO, "getbuf", 0);
+		rw_sleep(&bp->b_xflags, lock, PRIBIO, "getbuf", 0);
 		return (NULL);
 	}
 	if ((bp->b_flags & B_DELWRI) == 0) {
@@ -13436,30 +14039,65 @@
 int
 softdep_check_suspend(struct mount *mp,
 		      struct vnode *devvp,
-		      int softdep_deps,
-		      int softdep_accdeps,
+		      int softdep_depcnt,
+		      int softdep_accdepcnt,
 		      int secondary_writes,
 		      int secondary_accwrites)
 {
 	struct bufobj *bo;
 	struct ufsmount *ump;
-	int error;
+	struct inodedep *inodedep;
+	int error, unlinked;
 
-	ump = VFSTOUFS(mp);
 	bo = &devvp->v_bufobj;
-	ASSERT_BO_LOCKED(bo);
+	ASSERT_BO_WLOCKED(bo);
 
+	/*
+	 * If we are not running with soft updates, then we need only
+	 * deal with secondary writes as we try to suspend.
+	 */
+	if (MOUNTEDSOFTDEP(mp) == 0) {
+		MNT_ILOCK(mp);
+		while (mp->mnt_secondary_writes != 0) {
+			BO_UNLOCK(bo);
+			msleep(&mp->mnt_secondary_writes, MNT_MTX(mp),
+			    (PUSER - 1) | PDROP, "secwr", 0);
+			BO_LOCK(bo);
+			MNT_ILOCK(mp);
+		}
+
+		/*
+		 * Reasons for needing more work before suspend:
+		 * - Dirty buffers on devvp.
+		 * - Secondary writes occurred after start of vnode sync loop
+		 */
+		error = 0;
+		if (bo->bo_numoutput > 0 ||
+		    bo->bo_dirty.bv_cnt > 0 ||
+		    secondary_writes != 0 ||
+		    mp->mnt_secondary_writes != 0 ||
+		    secondary_accwrites != mp->mnt_secondary_accwrites)
+			error = EAGAIN;
+		BO_UNLOCK(bo);
+		return (error);
+	}
+
+	/*
+	 * If we are running with soft updates, then we need to coordinate
+	 * with them as we try to suspend.
+	 */
+	ump = VFSTOUFS(mp);
 	for (;;) {
-		if (!TRY_ACQUIRE_LOCK(&lk)) {
+		if (!TRY_ACQUIRE_LOCK(ump)) {
 			BO_UNLOCK(bo);
-			ACQUIRE_LOCK(&lk);
-			FREE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
+			FREE_LOCK(ump);
 			BO_LOCK(bo);
 			continue;
 		}
 		MNT_ILOCK(mp);
 		if (mp->mnt_secondary_writes != 0) {
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			BO_UNLOCK(bo);
 			msleep(&mp->mnt_secondary_writes,
 			       MNT_MTX(mp),
@@ -13470,6 +14108,20 @@
 		break;
 	}
 
+	unlinked = 0;
+	if (MOUNTEDSUJ(mp)) {
+		for (inodedep = TAILQ_FIRST(&ump->softdep_unlinked);
+		    inodedep != NULL;
+		    inodedep = TAILQ_NEXT(inodedep, id_unlinked)) {
+			if ((inodedep->id_state & (UNLINKED | UNLINKLINKS |
+			    UNLINKONLIST)) != (UNLINKED | UNLINKLINKS |
+			    UNLINKONLIST) ||
+			    !check_inodedep_free(inodedep))
+				continue;
+			unlinked++;
+		}
+	}
+
 	/*
 	 * Reasons for needing more work before suspend:
 	 * - Dirty buffers on devvp.
@@ -13479,14 +14131,14 @@
 	error = 0;
 	if (bo->bo_numoutput > 0 ||
 	    bo->bo_dirty.bv_cnt > 0 ||
-	    softdep_deps != 0 ||
-	    ump->softdep_deps != 0 ||
-	    softdep_accdeps != ump->softdep_accdeps ||
+	    softdep_depcnt != unlinked ||
+	    ump->softdep_deps != unlinked ||
+	    softdep_accdepcnt != ump->softdep_accdeps ||
 	    secondary_writes != 0 ||
 	    mp->mnt_secondary_writes != 0 ||
 	    secondary_accwrites != mp->mnt_secondary_accwrites)
 		error = EAGAIN;
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	BO_UNLOCK(bo);
 	return (error);
 }
@@ -13504,11 +14156,16 @@
 {
 	struct ufsmount *ump;
 
+	if (MOUNTEDSOFTDEP(mp) == 0) {
+		*softdep_depsp = 0;
+		*softdep_accdepsp = 0;
+		return;
+	}
 	ump = VFSTOUFS(mp);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	*softdep_depsp = ump->softdep_deps;
 	*softdep_accdepsp = ump->softdep_accdeps;
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 }
 
 /*
@@ -13525,12 +14182,12 @@
 
 	bo = &vp->v_bufobj;
 	ASSERT_VOP_LOCKED(vp, "drain_output");
-	ASSERT_BO_LOCKED(bo);
+	ASSERT_BO_WLOCKED(bo);
 
 	while (bo->bo_numoutput) {
 		bo->bo_flag |= BO_WWAIT;
 		msleep((caddr_t)&bo->bo_numoutput,
-		    BO_MTX(bo), PRIBIO + 1, "drainvp", 0);
+		    BO_LOCKPTR(bo), PRIBIO + 1, "drainvp", 0);
 	}
 }
 
@@ -13546,8 +14203,13 @@
 
 	if ((bp->b_ioflags & BIO_ERROR) == 0)
 		panic("softdep_deallocate_dependencies: dangling deps");
-	softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);
-	panic("softdep_deallocate_dependencies: unrecovered I/O error");
+	if (bp->b_vp != NULL && bp->b_vp->v_mount != NULL)
+		softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);
+	else
+		printf("softdep_deallocate_dependencies: "
+		    "got error %d while accessing filesystem\n", bp->b_error);
+	if (bp->b_error != ENXIO)
+		panic("softdep_deallocate_dependencies: unrecovered I/O error");
 }
 
 /*
@@ -13608,15 +14270,17 @@
 {
 	struct inodedep_hashhead *inodedephd;
 	struct inodedep *inodedep;
-	struct fs *fs;
+	struct ufsmount *ump;
 	int cnt;
 
-	fs = have_addr ? (struct fs *)addr : NULL;
-	for (cnt = 0; cnt < inodedep_hash; cnt++) {
-		inodedephd = &inodedep_hashtbl[cnt];
+	if (have_addr == 0) {
+		db_printf("Address required\n");
+		return;
+	}
+	ump = (struct ufsmount *)addr;
+	for (cnt = 0; cnt < ump->inodedep_hash_size; cnt++) {
+		inodedephd = &ump->inodedep_hashtbl[cnt];
 		LIST_FOREACH(inodedep, inodedephd, id_hash) {
-			if (fs != NULL && fs != inodedep->id_fs)
-				continue;
 			inodedep_print(inodedep, 0);
 		}
 	}
@@ -13658,11 +14322,17 @@
 
 DB_SHOW_COMMAND(mkdirs, db_show_mkdirs)
 {
+	struct mkdirlist *mkdirlisthd;
 	struct jaddref *jaddref;
 	struct diradd *diradd;
 	struct mkdir *mkdir;
 
-	LIST_FOREACH(mkdir, &mkdirlisthd, md_mkdirs) {
+	if (have_addr == 0) {
+		db_printf("Address required\n");
+		return;
+	}
+	mkdirlisthd = (struct mkdirlist *)addr;
+	LIST_FOREACH(mkdir, mkdirlisthd, md_mkdirs) {
 		diradd = mkdir->md_diradd;
 		db_printf("mkdir: %p state 0x%X dap %p state 0x%X",
 		    mkdir, mkdir->md_state, diradd, diradd->da_state);
@@ -13673,6 +14343,17 @@
 	}
 }
 
+/* exported to ffs_vfsops.c */
+extern void db_print_ffs(struct ufsmount *ump);
+void
+db_print_ffs(struct ufsmount *ump)
+{
+	db_printf("mp %p %s devvp %p fs %p su_wl %d su_deps %d su_req %d\n",
+	    ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
+	    ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
+	    ump->softdep_deps, ump->softdep_req);
+}
+
 #endif /* DDB */
 
 #endif /* SOFTUPDATES */

Modified: trunk/sys/ufs/ffs/ffs_subr.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_subr.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_subr.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -30,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_subr.c 207141 2010-04-24 07:05:35Z jeff $");
 
 #include <sys/param.h>
 

Modified: trunk/sys/ufs/ffs/ffs_suspend.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_suspend.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_suspend.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -27,11 +27,11 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/ufs/ffs/ffs_suspend.c 306175 2016-09-22 10:42:40Z kib $
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_suspend.c 306175 2016-09-22 10:42:40Z kib $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -177,7 +177,6 @@
 static int
 ffs_susp_suspend(struct mount *mp)
 {
-	struct fs *fs;
 	struct ufsmount *ump;
 	int error;
 
@@ -189,7 +188,6 @@
 		return (EBUSY);
 
 	ump = VFSTOUFS(mp);
-	fs = ump->um_fs;
 
 	/*
 	 * Make sure the calling thread is permitted to access the mounted
@@ -207,7 +205,7 @@
 		return (EPERM);
 #endif
 
-	if ((error = vfs_write_suspend(mp)) != 0)
+	if ((error = vfs_write_suspend(mp, VS_SKIP_UNMOUNT)) != 0)
 		return (error);
 
 	ump->um_writesuspended = 1;
@@ -237,7 +235,7 @@
 	KASSERT((mp->mnt_kern_flag & MNTK_SUSPEND) != 0,
 	    ("MNTK_SUSPEND not set"));
 
-	error = ffs_reload(mp, curthread, 1);
+	error = ffs_reload(mp, curthread, FFSR_FORCE | FFSR_UNSUSPEND);
 	if (error != 0)
 		panic("failed to unsuspend writes on %s", fs->fs_fsmnt);
 
@@ -253,7 +251,7 @@
 	 */
 	mp->mnt_susp_owner = curthread;
 
-	vfs_write_resume(mp);
+	vfs_write_resume(mp, 0);
 	vfs_unbusy(mp);
 	ump->um_writesuspended = 0;
 

Modified: trunk/sys/ufs/ffs/ffs_tables.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_tables.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_tables.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -30,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_tables.c 139825 2005-01-07 02:29:27Z imp $");
 
 #include <sys/param.h>
 #include <ufs/ufs/dinode.h>

Modified: trunk/sys/ufs/ffs/ffs_vfsops.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_vfsops.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_vfsops.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
@@ -30,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_vfsops.c 309208 2016-11-27 09:14:52Z kib $");
 
 #include "opt_quota.h"
 #include "opt_ufs.h"
@@ -42,6 +43,7 @@
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
+#include <sys/taskqueue.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
@@ -52,6 +54,7 @@
 #include <sys/ioccom.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
+#include <sys/rwlock.h>
 
 #include <security/mac/mac_framework.h>
 
@@ -142,10 +145,10 @@
 {
 	struct vnode *devvp;
 	struct thread *td;
-	struct ufsmount *ump = 0;
+	struct ufsmount *ump = NULL;
 	struct fs *fs;
 	pid_t fsckpid = 0;
-	int error, flags;
+	int error, error1, flags;
 	uint64_t mntorflags;
 	accmode_t accmode;
 	struct nameidata ndp;
@@ -254,31 +257,9 @@
 			 */
 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 				return (error);
-			for (;;) {
-				vn_finished_write(mp);
-				if ((error = vfs_write_suspend(mp)) != 0)
-					return (error);
-				MNT_ILOCK(mp);
-				if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
-					/*
-					 * Allow the secondary writes
-					 * to proceed.
-					 */
-					mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
-					    MNTK_SUSPEND2);
-					wakeup(&mp->mnt_flag);
-					MNT_IUNLOCK(mp);
-					/*
-					 * Allow the curthread to
-					 * ignore the suspension to
-					 * synchronize on-disk state.
-					 */
-					td->td_pflags |= TDP_IGNSUSP;
-					break;
-				}
-				MNT_IUNLOCK(mp);
-				vn_start_write(NULL, &mp, V_WAIT);
-			}
+			error = vfs_write_suspend_umnt(mp);
+			if (error != 0)
+				return (error);
 			/*
 			 * Check for and optionally get rid of files open
 			 * for writing.
@@ -292,7 +273,7 @@
 				error = ffs_flushfiles(mp, flags, td);
 			}
 			if (error) {
-				vfs_write_resume(mp);
+				vfs_write_resume(mp, 0);
 				return (error);
 			}
 			if (fs->fs_pendingblocks != 0 ||
@@ -309,7 +290,7 @@
 			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 				fs->fs_ronly = 0;
 				fs->fs_clean = 0;
-				vfs_write_resume(mp);
+				vfs_write_resume(mp, 0);
 				return (error);
 			}
 			if (MOUNTEDSOFTDEP(mp))
@@ -330,7 +311,7 @@
 			 * Allow the writers to note that filesystem
 			 * is ro now.
 			 */
-			vfs_write_resume(mp);
+			vfs_write_resume(mp, 0);
 		}
 		if ((mp->mnt_flag & MNT_RELOAD) &&
 		    (error = ffs_reload(mp, td, 0)) != 0)
@@ -481,6 +462,11 @@
 		 */
 		if (mp->mnt_flag & MNT_SNAPSHOT)
 			return (ffs_snapshot(mp, fspec));
+
+		/*
+		 * Must not call namei() while owning busy ref.
+		 */
+		vfs_unbusy(mp);
 	}
 
 	/*
@@ -488,7 +474,18 @@
 	 * and verify that it refers to a sensible disk device.
 	 */
 	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
-	if ((error = namei(&ndp)) != 0)
+	error = namei(&ndp);
+	if ((mp->mnt_flag & MNT_UPDATE) != 0) {
+		/*
+		 * Unmount does not start if MNT_UPDATE is set.  Mount
+		 * update busies mp before setting MNT_UPDATE.  We
+		 * must be able to retain our busy ref succesfully,
+		 * without sleep.
+		 */
+		error1 = vfs_busy(mp, MBF_NOWAIT);
+		MPASS(error1 == 0);
+	}
+	if (error != 0)
 		return (error);
 	NDFREE(&ndp, NDF_ONLY_PNBUF);
 	devvp = ndp.ni_vp;
@@ -532,7 +529,7 @@
 		 * We need the name for the mount point (also used for
 		 * "last mounted on") copied in. If an error occurs,
 		 * the mount point is discarded by the upper level code.
-		 * Note that vfs_mount() populates f_mntonname for us.
+		 * Note that vfs_mount_alloc() populates f_mntonname for us.
 		 */
 		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
 			vrele(devvp);
@@ -603,11 +600,13 @@
  *	2) re-read superblock from disk.
  *	3) re-read summary information from disk.
  *	4) invalidate all inactive vnodes.
- *	5) invalidate all cached file data.
- *	6) re-read inode data for all active vnodes.
+ *	5) clear MNTK_SUSPEND2 and MNTK_SUSPENDED flags, allowing secondary
+ *	   writers, if requested.
+ *	6) invalidate all cached file data.
+ *	7) re-read inode data for all active vnodes.
  */
 int
-ffs_reload(struct mount *mp, struct thread *td, int force)
+ffs_reload(struct mount *mp, struct thread *td, int flags)
 {
 	struct vnode *vp, *mvp, *devvp;
 	struct inode *ip;
@@ -616,13 +615,14 @@
 	struct fs *fs, *newfs;
 	struct ufsmount *ump;
 	ufs2_daddr_t sblockloc;
-	int i, blks, size, error;
+	int i, blks, error;
+	u_long size;
 	int32_t *lp;
 
 	ump = VFSTOUFS(mp);
 
 	MNT_ILOCK(mp);
-	if ((mp->mnt_flag & MNT_RDONLY) == 0 && force == 0) {
+	if ((mp->mnt_flag & MNT_RDONLY) == 0 && (flags & FFSR_FORCE) == 0) {
 		MNT_IUNLOCK(mp);
 		return (EINVAL);
 	}
@@ -686,7 +686,7 @@
 		size += fs->fs_ncg * sizeof(int32_t);
 	size += fs->fs_ncg * sizeof(u_int8_t);
 	free(fs->fs_csp, M_UFSMNT);
-	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
+	space = malloc(size, M_UFSMNT, M_WAITOK);
 	fs->fs_csp = space;
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
@@ -712,6 +712,12 @@
 	size = fs->fs_ncg * sizeof(u_int8_t);
 	fs->fs_contigdirs = (u_int8_t *)space;
 	bzero(fs->fs_contigdirs, size);
+	if ((flags & FFSR_UNSUSPEND) != 0) {
+		MNT_ILOCK(mp);
+		mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2);
+		wakeup(&mp->mnt_flag);
+		MNT_IUNLOCK(mp);
+	}
 
 loop:
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
@@ -773,7 +779,8 @@
 	struct cdev *dev;
 	void *space;
 	ufs2_daddr_t sblockloc;
-	int error, i, blks, size, ronly;
+	int error, i, blks, len, ronly;
+	u_long size;
 	int32_t *lp;
 	struct ucred *cred;
 	struct g_consumer *cp;
@@ -784,23 +791,31 @@
 	cred = td ? td->td_ucred : NOCRED;
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 
+	KASSERT(devvp->v_type == VCHR, ("reclaimed devvp"));
 	dev = devvp->v_rdev;
-	dev_ref(dev);
+	if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0,
+	    (uintptr_t)mp) == 0) {
+		VOP_UNLOCK(devvp, 0);
+		return (EBUSY);
+	}
 	DROP_GIANT();
 	g_topology_lock();
 	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
 	g_topology_unlock();
 	PICKUP_GIANT();
+	if (error != 0) {
+		atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
+		VOP_UNLOCK(devvp, 0);
+		return (error);
+	}
+	dev_ref(dev);
+	devvp->v_bufobj.bo_ops = &ffs_ops;
 	VOP_UNLOCK(devvp, 0);
-	if (error)
-		goto out;
-	if (devvp->v_rdev->si_iosize_max != 0)
-		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
+	if (dev->si_iosize_max != 0)
+		mp->mnt_iosize_max = dev->si_iosize_max;
 	if (mp->mnt_iosize_max > MAXPHYS)
 		mp->mnt_iosize_max = MAXPHYS;
 
-	devvp->v_bufobj.bo_ops = &ffs_ops;
-
 	fs = NULL;
 	sblockloc = 0;
 	/*
@@ -872,11 +887,11 @@
 		/*
 		 * Get journal provider name.
 		 */
-		size = 1024;
-		mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
-		if (g_io_getattr("GJOURNAL::provider", cp, &size,
+		len = 1024;
+		mp->mnt_gjprovider = malloc((u_long)len, M_UFSMNT, M_WAITOK);
+		if (g_io_getattr("GJOURNAL::provider", cp, &len,
 		    mp->mnt_gjprovider) == 0) {
-			mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
+			mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, len,
 			    M_UFSMNT, M_WAITOK);
 			MNT_ILOCK(mp);
 			mp->mnt_flag |= MNT_GJOURNAL;
@@ -928,7 +943,7 @@
 	if (fs->fs_contigsumsize > 0)
 		size += fs->fs_ncg * sizeof(int32_t);
 	size += fs->fs_ncg * sizeof(u_int8_t);
-	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
+	space = malloc(size, M_UFSMNT, M_WAITOK);
 	fs->fs_csp = space;
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
@@ -1013,8 +1028,8 @@
 #endif
 	}
 	if ((fs->fs_flags & FS_TRIM) != 0) {
-		size = sizeof(int);
-		if (g_io_getattr("GEOM::candelete", cp, &size,
+		len = sizeof(int);
+		if (g_io_getattr("GEOM::candelete", cp, &len,
 		    &ump->um_candelete) == 0) {
 			if (!ump->um_candelete)
 				printf("WARNING: %s: TRIM flag on fs but disk "
@@ -1026,6 +1041,12 @@
 			    mp->mnt_stat.f_mntonname);
 			ump->um_candelete = 0;
 		}
+		if (ump->um_candelete) {
+			ump->um_trim_tq = taskqueue_create("trim", M_WAITOK,
+			    taskqueue_thread_enqueue, &ump->um_trim_tq);
+			taskqueue_start_threads(&ump->um_trim_tq, 1, PVFS,
+			    "%s trim", mp->mnt_stat.f_mntonname);
+		}
 	}
 
 	ump->um_mountp = mp;
@@ -1063,8 +1084,6 @@
 			ffs_flushfiles(mp, FORCECLOSE, td);
 			goto out;
 		}
-		if (devvp->v_type == VCHR && devvp->v_rdev != NULL)
-			devvp->v_rdev->si_mountpt = mp;
 		if (fs->fs_snapinum[0] != 0)
 			ffs_snapshot_mount(mp);
 		fs->fs_fmod = 1;
@@ -1072,11 +1091,11 @@
 		(void) ffs_sbupdate(ump, MNT_WAIT, 0);
 	}
 	/*
-	 * Initialize filesystem stat information in mount struct.
+	 * Initialize filesystem state information in mount struct.
 	 */
 	MNT_ILOCK(mp);
-	mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED |
-	    MNTK_EXTENDED_SHARED | MNTK_NO_IOPF;
+	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
+	    MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS | MNTK_USES_BCACHE;
 	MNT_IUNLOCK(mp);
 #ifdef UFS_EXTATTR
 #ifdef UFS_EXTATTR_AUTOSTART
@@ -1114,6 +1133,7 @@
 		free(ump, M_UFSMNT);
 		mp->mnt_data = NULL;
 	}
+	atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
 	dev_rel(dev);
 	return (error);
 }
@@ -1234,7 +1254,7 @@
 	susp = 0;
 	if (mntflags & MNT_FORCE) {
 		flags |= FORCECLOSE;
-		susp = fs->fs_ronly != 0;
+		susp = fs->fs_ronly == 0;
 	}
 #ifdef UFS_EXTATTR
 	if ((error = ufs_extattr_stop(mp, td))) {
@@ -1249,25 +1269,9 @@
 	}
 #endif
 	if (susp) {
-		/*
-		 * dounmount already called vn_start_write().
-		 */
-		for (;;) {
-			vn_finished_write(mp);
-			if ((error = vfs_write_suspend(mp)) != 0)
-				return (error);
-			MNT_ILOCK(mp);
-			if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
-				mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
-				    MNTK_SUSPEND2);
-				wakeup(&mp->mnt_flag);
-				MNT_IUNLOCK(mp);
-				td->td_pflags |= TDP_IGNSUSP;
-				break;
-			}
-			MNT_IUNLOCK(mp);
-			vn_start_write(NULL, &mp, V_WAIT);
-		}
+		error = vfs_write_suspend_umnt(mp);
+		if (error != 0)
+			goto fail1;
 	}
 	if (MOUNTEDSOFTDEP(mp))
 		error = softdep_flushfiles(mp, flags, td);
@@ -1285,7 +1289,8 @@
 		fs->fs_pendinginodes = 0;
 	}
 	UFS_UNLOCK(ump);
-	softdep_unmount(mp);
+	if (MOUNTEDSOFTDEP(mp))
+		softdep_unmount(mp);
 	if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) {
 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
 		error = ffs_sbupdate(ump, MNT_WAIT, 0);
@@ -1294,9 +1299,13 @@
 			goto fail;
 		}
 	}
-	if (susp) {
-		vfs_write_resume(mp);
-		vn_start_write(NULL, &mp, V_WAIT);
+	if (susp)
+		vfs_write_resume(mp, VR_START_WRITE);
+	if (ump->um_trim_tq != NULL) {
+		while (ump->um_trim_inflight != 0)
+			pause("ufsutr", hz);
+		taskqueue_drain_all(ump->um_trim_tq);
+		taskqueue_free(ump->um_trim_tq);
 	}
 	DROP_GIANT();
 	g_topology_lock();
@@ -1310,8 +1319,7 @@
 	g_vfs_close(ump->um_cp);
 	g_topology_unlock();
 	PICKUP_GIANT();
-	if (ump->um_devvp->v_type == VCHR && ump->um_devvp->v_rdev != NULL)
-		ump->um_devvp->v_rdev->si_mountpt = NULL;
+	atomic_store_rel_ptr((uintptr_t *)&ump->um_dev->si_mountpt, 0);
 	vrele(ump->um_devvp);
 	dev_rel(ump->um_dev);
 	mtx_destroy(UFS_MTX(ump));
@@ -1329,10 +1337,9 @@
 	return (error);
 
 fail:
-	if (susp) {
-		vfs_write_resume(mp);
-		vn_start_write(NULL, &mp, V_WAIT);
-	}
+	if (susp)
+		vfs_write_resume(mp, VR_START_WRITE);
+fail1:
 #ifdef UFS_EXTATTR
 	if (e_restart) {
 		ufs_extattr_uepm_init(&ump->um_extattr);
@@ -1448,6 +1455,14 @@
 	return (0);
 }
 
+static bool
+sync_doupdate(struct inode *ip)
+{
+
+	return ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED |
+	    IN_UPDATE)) != 0);
+}
+
 /*
  * For a lazy sync, we only care about access times, quotas and the
  * superblock.  Other filesystem changes are already converted to
@@ -1481,8 +1496,7 @@
 		 * Test also all the other timestamp flags too, to pick up
 		 * any other cases that could be missed.
 		 */
-		if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED |
-		    IN_UPDATE)) == 0) {
+		if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) {
 			VI_UNLOCK(vp);
 			continue;
 		}
@@ -1489,7 +1503,8 @@
 		if ((error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
 		    td)) != 0)
 			continue;
-		error = ffs_update(vp, 0);
+		if (sync_doupdate(ip))
+			error = ffs_update(vp, 0);
 		if (error != 0)
 			allerror = error;
 		vput(vp);
@@ -1524,7 +1539,7 @@
 	struct inode *ip;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct fs *fs;
-	int error, count, wait, lockreq, allerror = 0;
+	int error, count, lockreq, allerror = 0;
 	int suspend;
 	int suspended;
 	int secondary_writes;
@@ -1533,7 +1548,6 @@
 	int softdep_accdeps;
 	struct bufobj *bo;
 
-	wait = 0;
 	suspend = 0;
 	suspended = 0;
 	td = curthread;
@@ -1541,8 +1555,11 @@
 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0)
 		panic("%s: ffs_sync: modification on read-only filesystem",
 		    fs->fs_fsmnt);
-	if (waitfor == MNT_LAZY)
-		return (ffs_sync_lazy(mp));
+	if (waitfor == MNT_LAZY) {
+		if (!rebooting)
+			return (ffs_sync_lazy(mp));
+		waitfor = MNT_NOWAIT;
+	}
 
 	/*
 	 * Write back each (modified) inode.
@@ -1552,10 +1569,8 @@
 		suspend = 1;
 		waitfor = MNT_WAIT;
 	}
-	if (waitfor == MNT_WAIT) {
-		wait = 1;
+	if (waitfor == MNT_WAIT)
 		lockreq = LK_EXCLUSIVE;
-	}
 	lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
 loop:
 	/* Grab snapshot of secondary write counts */
@@ -1599,7 +1614,7 @@
 	/*
 	 * Force stale filesystem control information to be flushed.
 	 */
-	if (waitfor == MNT_WAIT) {
+	if (waitfor == MNT_WAIT || rebooting) {
 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
 			allerror = error;
 		/* Flushed work items may create new vnodes to clean */
@@ -1616,9 +1631,12 @@
 	if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) {
 		BO_UNLOCK(bo);
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
-		if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
+		error = VOP_FSYNC(devvp, waitfor, td);
+		VOP_UNLOCK(devvp, 0);
+		if (MOUNTEDSOFTDEP(mp) && (error == 0 || error == EAGAIN))
+			error = ffs_sbupdate(ump, waitfor, 0);
+		if (error != 0)
 			allerror = error;
-		VOP_UNLOCK(devvp, 0);
 		if (allerror == 0 && waitfor == MNT_WAIT)
 			goto loop;
 	} else if (suspend != 0) {
@@ -1679,7 +1697,7 @@
 	/*
 	 * We must promote to an exclusive lock for vnode creation.  This
 	 * can happen if lookup is passed LOCKSHARED.
- 	 */
+	 */
 	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
 		flags &= ~LK_TYPE_MASK;
 		flags |= LK_EXCLUSIVE;
@@ -1695,21 +1713,11 @@
 	ump = VFSTOUFS(mp);
 	dev = ump->um_dev;
 	fs = ump->um_fs;
-
-	/*
-	 * If this malloc() is performed after the getnewvnode()
-	 * it might block, leaving a vnode with a NULL v_data to be
-	 * found by ffs_sync() if a sync happens to fire right then,
-	 * which will cause a panic because ffs_sync() blindly
-	 * dereferences vp->v_data (as well it should).
-	 */
 	ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
 
 	/* Allocate a new vnode/inode. */
-	if (fs->fs_magic == FS_UFS1_MAGIC)
-		error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
-	else
-		error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
+	error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ?
+	    &ffs_vnodeops1 : &ffs_vnodeops2, &vp);
 	if (error) {
 		*vpp = NULL;
 		uma_zfree(uma_inode, ip);
@@ -1728,6 +1736,7 @@
 	ip->i_dev = dev;
 	ip->i_number = ino;
 	ip->i_ea_refs = 0;
+	ip->i_nextclustercg = -1;
 #ifdef QUOTA
 	{
 		int i;
@@ -1954,7 +1963,7 @@
 	}
 	fs->fs_fmod = 0;
 	fs->fs_time = time_second;
-	if (fs->fs_flags & FS_DOSOFTDEP)
+	if (MOUNTEDSOFTDEP(ump->um_mountp))
 		softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp);
 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
 	ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
@@ -2012,13 +2021,19 @@
 	BO_LOCK(bufobj);
 	if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
 		panic("backgroundwritedone: lost buffer");
-	/* Grab an extra reference to be dropped by the bufdone() below. */
-	bufobj_wrefl(bufobj);
+
+	/*
+	 * We should mark the cylinder group buffer origbp as
+	 * dirty, to not loose the failed write.
+	 */
+	if ((bp->b_ioflags & BIO_ERROR) != 0)
+		origbp->b_vflags |= BV_BKGRDERR;
 	BO_UNLOCK(bufobj);
 	/*
 	 * Process dependencies then return any unfinished ones.
 	 */
-	if (!LIST_EMPTY(&bp->b_dep))
+	pbrelvp(bp);
+	if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0)
 		buf_complete(bp);
 #ifdef SOFTUPDATES
 	if (!LIST_EMPTY(&bp->b_dep))
@@ -2030,6 +2045,15 @@
 	 */
 	bp->b_flags |= B_NOCACHE;
 	bp->b_flags &= ~B_CACHE;
+
+	/*
+	 * Prevent brelse() from trying to keep and re-dirtying bp on
+	 * errors. It causes b_bufobj dereference in
+	 * bdirty()/reassignbuf(), and b_bufobj was cleared in
+	 * pbrelvp() above.
+	 */
+	if ((bp->b_ioflags & BIO_ERROR) != 0)
+		bp->b_flags |= B_INVAL;
 	bufdone(bp);
 	BO_LOCK(bufobj);
 	/*
@@ -2063,7 +2087,6 @@
 static int
 ffs_bufwrite(struct buf *bp)
 {
-	int oldflags, s;
 	struct buf *newbp;
 
 	CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
@@ -2072,11 +2095,8 @@
 		return (0);
 	}
 
-	oldflags = bp->b_flags;
-
 	if (!BUF_ISLOCKED(bp))
 		panic("bufwrite: buffer is not busy???");
-	s = splbio();
 	/*
 	 * If a background write is already in progress, delay
 	 * writing this block if it is asynchronous. Otherwise
@@ -2086,15 +2106,16 @@
 	if (bp->b_vflags & BV_BKGRDINPROG) {
 		if (bp->b_flags & B_ASYNC) {
 			BO_UNLOCK(bp->b_bufobj);
-			splx(s);
 			bdwrite(bp);
 			return (0);
 		}
 		bp->b_vflags |= BV_BKGRDWAIT;
-		msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
+		msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj), PRIBIO,
+		    "bwrbg", 0);
 		if (bp->b_vflags & BV_BKGRDINPROG)
 			panic("bufwrite: still writing");
 	}
+	bp->b_vflags &= ~BV_BKGRDERR;
 	BO_UNLOCK(bp->b_bufobj);
 
 	/*
@@ -2117,24 +2138,19 @@
 		if (newbp == NULL)
 			goto normal_write;
 
-		/*
-		 * set it to be identical to the old block.  We have to
-		 * set b_lblkno and BKGRDMARKER before calling bgetvp()
-		 * to avoid confusing the splay tree and gbincore().
-		 */
+		KASSERT((bp->b_flags & B_UNMAPPED) == 0, ("Unmapped cg"));
 		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
-		newbp->b_lblkno = bp->b_lblkno;
-		newbp->b_xflags |= BX_BKGRDMARKER;
 		BO_LOCK(bp->b_bufobj);
 		bp->b_vflags |= BV_BKGRDINPROG;
-		bgetvp(bp->b_vp, newbp);
 		BO_UNLOCK(bp->b_bufobj);
-		newbp->b_bufobj = &bp->b_vp->v_bufobj;
+		newbp->b_xflags |= BX_BKGRDMARKER;
+		newbp->b_lblkno = bp->b_lblkno;
 		newbp->b_blkno = bp->b_blkno;
 		newbp->b_offset = bp->b_offset;
 		newbp->b_iodone = ffs_backgroundwritedone;
 		newbp->b_flags |= B_ASYNC;
 		newbp->b_flags &= ~B_INVAL;
+		pbgetvp(bp->b_vp, newbp);
 
 #ifdef SOFTUPDATES
 		/*
@@ -2150,12 +2166,9 @@
 #endif
 
 		/*
-		 * Initiate write on the copy, release the original to
-		 * the B_LOCKED queue so that it cannot go away until
-		 * the background write completes. If not locked it could go
-		 * away and then be reconstituted while it was being written.
-		 * If the reconstituted buffer were written, we could end up
-		 * with two background copies being written at the same time.
+		 * Initiate write on the copy, release the original.  The
+		 * BKGRDINPROG flag prevents it from going away until 
+		 * the background write completes.
 		 */
 		bqrelse(bp);
 		bp = newbp;
@@ -2241,15 +2254,10 @@
 }
 
 #ifdef	DDB
+#ifdef SOFTUPDATES
 
-static void
-db_print_ffs(struct ufsmount *ump)
-{
-	db_printf("mp %p %s devvp %p fs %p su_wl %d su_deps %d su_req %d\n",
-	    ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
-	    ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
-	    ump->softdep_deps, ump->softdep_req);
-}
+/* defined in ffs_softdep.c */
+extern void db_print_ffs(struct ufsmount *ump);
 
 DB_SHOW_COMMAND(ffs, db_show_ffs)
 {
@@ -2268,4 +2276,5 @@
 	}
 }
 
+#endif	/* SOFTUPDATES */
 #endif	/* DDB */

Modified: trunk/sys/ufs/ffs/ffs_vnops.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_vnops.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_vnops.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
  * All rights reserved.
@@ -57,12 +58,12 @@
  * SUCH DAMAGE.
  *
  *	from: @(#)ufs_readwrite.c	8.11 (Berkeley) 5/8/95
- * from: $MidnightBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
+ * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
  *	@(#)ffs_vnops.c	8.15 (Berkeley) 5/14/95
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_vnops.c 284201 2015-06-10 02:14:33Z kib $");
 
 #include <sys/param.h>
 #include <sys/bio.h>
@@ -75,6 +76,7 @@
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/priv.h>
+#include <sys/rwlock.h>
 #include <sys/stat.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
@@ -199,8 +201,8 @@
 		 * bo_dirty list. Recheck and resync as needed.
 		 */
 		BO_LOCK(bo);
-		if (vp->v_type == VREG && (bo->bo_numoutput > 0 ||
-		    bo->bo_dirty.bv_cnt > 0)) {
+		if ((vp->v_type == VREG || vp->v_type == VDIR) &&
+		    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
 			BO_UNLOCK(bo);
 			goto retry;
 		}
@@ -258,9 +260,17 @@
 			continue;
 		if (bp->b_lblkno > lbn)
 			panic("ffs_syncvnode: syncing truncated data.");
-		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
+		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) {
+			BO_UNLOCK(bo);
+		} else if (wait != 0) {
+			if (BUF_LOCK(bp,
+			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
+			    BO_LOCKPTR(bo)) != 0) {
+				bp->b_vflags &= ~BV_SCANNED;
+				goto next;
+			}
+		} else
 			continue;
-		BO_UNLOCK(bo);
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("ffs_fsync: not dirty");
 		/*
@@ -508,7 +518,8 @@
 			/*
 			 * Don't do readahead if this is the end of the file.
 			 */
-			error = bread(vp, lbn, size, NOCRED, &bp);
+			error = bread_gb(vp, lbn, size, NOCRED,
+			    GB_UNMAPPED, &bp);
 		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			/*
 			 * Otherwise if we are allowed to cluster,
@@ -519,7 +530,7 @@
 			 */
 			error = cluster_read(vp, ip->i_size, lbn,
 			    size, NOCRED, blkoffset + uio->uio_resid,
-			    seqcount, &bp);
+			    seqcount, GB_UNMAPPED, &bp);
 		} else if (seqcount > 1) {
 			/*
 			 * If we are NOT allowed to cluster, then
@@ -529,9 +540,9 @@
 			 * arguments point to arrays of the size specified in
 			 * the 6th argument.
 			 */
-			int nextsize = blksize(fs, ip, nextlbn);
-			error = breadn(vp, lbn,
-			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
+			u_int nextsize = blksize(fs, ip, nextlbn);
+			error = breadn_flags(vp, lbn, size, &nextlbn,
+			    &nextsize, 1, NOCRED, GB_UNMAPPED, &bp);
 		} else {
 			/*
 			 * Failing all of the above, just read what the
@@ -538,7 +549,8 @@
 			 * user asked for. Interestingly, the same as
 			 * the first option above.
 			 */
-			error = bread(vp, lbn, size, NOCRED, &bp);
+			error = bread_gb(vp, lbn, size, NOCRED,
+			    GB_UNMAPPED, &bp);
 		}
 		if (error) {
 			brelse(bp);
@@ -569,8 +581,13 @@
 			xfersize = size;
 		}
 
-		error = vn_io_fault_uiomove((char *)bp->b_data + blkoffset,
-		    (int)xfersize, uio);
+		if ((bp->b_flags & B_UNMAPPED) == 0) {
+			error = vn_io_fault_uiomove((char *)bp->b_data +
+			    blkoffset, (int)xfersize, uio);
+		} else {
+			error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
+			    (int)xfersize, uio);
+		}
 		if (error)
 			break;
 
@@ -612,7 +629,7 @@
 	}
 
 	if ((error == 0 || uio->uio_resid != orig_resid) &&
-	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0 &&
+	    (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0 &&
 	    (ip->i_flag & IN_ACCESS) == 0) {
 		VI_LOCK(vp);
 		ip->i_flag |= IN_ACCESS;
@@ -701,6 +718,7 @@
 		flags = seqcount << BA_SEQSHIFT;
 	if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
 		flags |= IO_SYNC;
+	flags |= BA_UNMAPPED;
 
 	for (error = 0; uio->uio_resid > 0;) {
 		lbn = lblkno(fs, uio->uio_offset);
@@ -711,10 +729,10 @@
 		if (uio->uio_offset + xfersize > ip->i_size)
 			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
 
-                /*
+		/*
 		 * We must perform a read-before-write if the transfer size
 		 * does not cover the entire buffer.
-                 */
+		 */
 		if (fs->fs_bsize > xfersize)
 			flags |= BA_CLRBUF;
 		else
@@ -740,8 +758,13 @@
 		if (size < xfersize)
 			xfersize = size;
 
-		error = vn_io_fault_uiomove((char *)bp->b_data + blkoffset,
-		    (int)xfersize, uio);
+		if ((bp->b_flags & B_UNMAPPED) == 0) {
+			error = vn_io_fault_uiomove((char *)bp->b_data +
+			    blkoffset, (int)xfersize, uio);
+		} else {
+			error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
+			    (int)xfersize, uio);
+		}
 		/*
 		 * If the buffer is not already filled and we encounter an
 		 * error while trying to fill it, we have to clear out any
@@ -784,7 +807,8 @@
 		} else if (xfersize + blkoffset == fs->fs_bsize) {
 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
 				bp->b_flags |= B_CLUSTEROK;
-				cluster_write(vp, bp, ip->i_size, seqcount);
+				cluster_write(vp, bp, ip->i_size, seqcount,
+				    GB_UNMAPPED);
 			} else {
 				bawrite(bp);
 			}
@@ -814,8 +838,7 @@
 	if (error) {
 		if (ioflag & IO_UNIT) {
 			(void)ffs_truncate(vp, osize,
-			    IO_NORMAL | (ioflag & IO_SYNC),
-			    ap->a_cred, uio->uio_td);
+			    IO_NORMAL | (ioflag & IO_SYNC), ap->a_cred);
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		}
@@ -844,7 +867,7 @@
 	 * user programs might reference data beyond the actual end of file
 	 * occuring within the page.  We have to zero that data.
 	 */
-	VM_OBJECT_LOCK(mreq->object);
+	VM_OBJECT_WLOCK(mreq->object);
 	if (mreq->valid) {
 		if (mreq->valid != VM_PAGE_BITS_ALL)
 			vm_page_zero_invalid(mreq, TRUE);
@@ -855,10 +878,10 @@
 				vm_page_unlock(ap->a_m[i]);
 			}
 		}
-		VM_OBJECT_UNLOCK(mreq->object);
+		VM_OBJECT_WUNLOCK(mreq->object);
 		return VM_PAGER_OK;
 	}
-	VM_OBJECT_UNLOCK(mreq->object);
+	VM_OBJECT_WUNLOCK(mreq->object);
 
 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
 					    ap->a_count,
@@ -942,7 +965,7 @@
 			 * arguments point to arrays of the size specified in
 			 * the 6th argument.
 			 */
-			int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
+			u_int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
 
 			nextlbn = -1 - nextlbn;
 			error = breadn(vp, -1 - lbn,
@@ -1068,7 +1091,7 @@
 		/*
 		 * We must perform a read-before-write if the transfer size
 		 * does not cover the entire buffer.
-                 */
+		 */
 		if (fs->fs_bsize > xfersize)
 			flags |= BA_CLRBUF;
 		else
@@ -1137,7 +1160,7 @@
 	if (error) {
 		if (ioflag & IO_UNIT) {
 			(void)ffs_truncate(vp, osize,
-			    IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td);
+			    IO_EXT | (ioflag&IO_SYNC), ucred);
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		}
@@ -1204,7 +1227,8 @@
 	struct fs *fs;
 	struct uio luio;
 	struct iovec liovec;
-	int easize, error;
+	u_int easize;
+	int error;
 	u_char *eae;
 
 	ip = VTOI(vp);
@@ -1328,7 +1352,7 @@
 		luio.uio_td = td;
 		/* XXX: I'm not happy about truncating to zero size */
 		if (ip->i_ea_len < dp->di_extsize)
-			error = ffs_truncate(vp, 0, IO_EXT, cred, td);
+			error = ffs_truncate(vp, 0, IO_EXT, cred);
 		error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
 	}
 	if (--ip->i_ea_refs == 0) {
@@ -1384,12 +1408,7 @@
 };
 */
 {
-	struct inode *ip;
-	struct fs *fs;
 
-	ip = VTOI(ap->a_vp);
-	fs = ip->i_fs;
-
 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 
@@ -1412,12 +1431,7 @@
 };
 */
 {
-	struct inode *ip;
-	struct fs *fs;
 
-	ip = VTOI(ap->a_vp);
-	fs = ip->i_fs;
-
 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 
@@ -1530,13 +1544,11 @@
 */
 {
 	struct inode *ip;
-	struct fs *fs;
 	u_char *eae, *p;
 	unsigned easize;
 	int error, ealen;
 
 	ip = VTOI(ap->a_vp);
-	fs = ip->i_fs;
 
 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 		return (EOPNOTSUPP);
@@ -1585,7 +1597,6 @@
 */
 {
 	struct inode *ip;
-	struct fs *fs;
 	u_char *eae, *p, *pe, *pn;
 	unsigned easize;
 	uint32_t ul;
@@ -1592,7 +1603,6 @@
 	int error, ealen;
 
 	ip = VTOI(ap->a_vp);
-	fs = ip->i_fs;
 
 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 		return (EOPNOTSUPP);

Modified: trunk/sys/ufs/ffs/fs.h
===================================================================
--- trunk/sys/ufs/ffs/fs.h	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/fs.h	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -27,11 +28,11 @@
  * SUCH DAMAGE.
  *
  *	@(#)fs.h	8.13 (Berkeley) 3/21/95
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ffs/fs.h 322860 2017-08-24 21:44:23Z mckusick $
  */
 
 #ifndef _UFS_FFS_FS_H_
-#define _UFS_FFS_FS_H_
+#define	_UFS_FFS_FS_H_
 
 #include <sys/mount.h>
 #include <ufs/ufs/dinode.h>
@@ -68,18 +69,18 @@
  * given in byte-offset form, so they do not imply a sector size. The
  * SBLOCKSEARCH specifies the order in which the locations should be searched.
  */
-#define SBLOCK_FLOPPY	     0
-#define SBLOCK_UFS1	  8192
-#define SBLOCK_UFS2	 65536
-#define SBLOCK_PIGGY	262144
-#define SBLOCKSIZE	  8192
-#define SBLOCKSEARCH \
+#define	SBLOCK_FLOPPY	     0
+#define	SBLOCK_UFS1	  8192
+#define	SBLOCK_UFS2	 65536
+#define	SBLOCK_PIGGY	262144
+#define	SBLOCKSIZE	  8192
+#define	SBLOCKSEARCH \
 	{ SBLOCK_UFS2, SBLOCK_UFS1, SBLOCK_FLOPPY, SBLOCK_PIGGY, -1 }
 
 /*
  * Max number of fragments per block. This value is NOT tweakable.
  */
-#define MAXFRAG 	8
+#define	MAXFRAG 	8
 
 /*
  * Addresses stored in inodes are capable of addressing fragments
@@ -109,7 +110,7 @@
  * Note that super blocks are always of size SBLOCKSIZE,
  * and that both SBLOCKSIZE and MAXBSIZE must be >= MINBSIZE.
  */
-#define MINBSIZE	4096
+#define	MINBSIZE	4096
 
 /*
  * The path name on which the filesystem is mounted is maintained
@@ -116,13 +117,13 @@
  * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in
  * the super block for this name.
  */
-#define MAXMNTLEN	468
+#define	MAXMNTLEN	468
 
 /*
  * The volume name for this filesystem is maintained in fs_volname.
  * MAXVOLLEN defines the length of the buffer allocated.
  */
-#define MAXVOLLEN	32
+#define	MAXVOLLEN	32
 
 /*
  * There is a 128-byte region in the superblock reserved for in-core
@@ -147,7 +148,7 @@
  * value of fs_maxcontig. To conserve space, a maximum summary size
  * is set by FS_MAXCONTIG.
  */
-#define FS_MAXCONTIG	16
+#define	FS_MAXCONTIG	16
 
 /*
  * MINFREE gives the minimum acceptable percentage of filesystem
@@ -161,8 +162,8 @@
  * default value. With 10% free space, fragmentation is not a
  * problem, so we choose to optimize for time.
  */
-#define MINFREE		8
-#define DEFAULTOPT	FS_OPTTIME
+#define	MINFREE		8
+#define	DEFAULTOPT	FS_OPTTIME
 
 /*
  * Grigoriy Orlov <gluk at ptci.ru> has done some extensive work to fine
@@ -173,8 +174,8 @@
  * filesystems, but may need to be tuned for odd cases like filesystems
  * being used for squid caches or news spools.
  */
-#define AVFILESIZ	16384	/* expected average file size */
-#define AFPDIR		64	/* expected number of files per directory */
+#define	AVFILESIZ	16384	/* expected average file size */
+#define	AFPDIR		64	/* expected number of files per directory */
 
 /*
  * The maximum number of snapshot nodes that can be associated
@@ -184,7 +185,7 @@
  * maintaining too many will slow the filesystem performance, so
  * having this limit is a good idea.
  */
-#define FSMAXSNAP 20
+#define	FSMAXSNAP 20
 
 /*
  * Used to identify special blocks in snapshots:
@@ -197,8 +198,8 @@
  *	identify blocks that are in use by other snapshots (which are
  *	expunged from this snapshot).
  */
-#define BLK_NOCOPY ((ufs2_daddr_t)(1))
-#define BLK_SNAP ((ufs2_daddr_t)(2))
+#define	BLK_NOCOPY ((ufs2_daddr_t)(1))
+#define	BLK_SNAP ((ufs2_daddr_t)(2))
 
 /*
  * Sysctl values for the fast filesystem.
@@ -214,7 +215,7 @@
 #define	FFS_ADJ_NIFREE		 9	/* adjust number of free inodes */
 #define	FFS_ADJ_NFFREE		10 	/* adjust number of free frags */
 #define	FFS_ADJ_NUMCLUSTERS	11	/* adjust number of free clusters */
-#define FFS_SET_CWD		12	/* set current directory */
+#define	FFS_SET_CWD		12	/* set current directory */
 #define	FFS_SET_DOTDOT		13	/* set inode number for ".." */
 #define	FFS_UNLINK		14	/* remove a name in the filesystem */
 #define	FFS_SET_INODE		15	/* update an on-disk inode */
@@ -234,6 +235,20 @@
 };
 
 /*
+ * A recovery structure placed at the end of the boot block area by newfs
+ * that can be used by fsck to search for alternate superblocks.
+ */
+#define RESID	(4096 - 20)	/* disk sector size minus recovery area size */
+struct fsrecovery {
+	char	block[RESID];	/* unused part of sector */
+	int32_t	fsr_magic;	/* magic number */
+	int32_t	fsr_fsbtodb;	/* fsbtodb and dbtofsb shift constant */
+	int32_t	fsr_sblkno;	/* offset of super-block in filesys */
+	int32_t	fsr_fpg;	/* blocks per group * fs_frag */
+	u_int32_t fsr_ncg;	/* number of cylinder groups */
+};
+
+/*
  * Per cylinder group information; summarized in blocks allocated
  * from first cylinder group data blocks.  These blocks have to be
  * read in from fs_csaddr (size fs_cssize) in addition to the
@@ -343,7 +358,7 @@
 	ufs2_daddr_t fs_csaddr;		/* blk addr of cyl grp summary area */
 	int64_t	 fs_pendingblocks;	/* (u) blocks being freed */
 	u_int32_t fs_pendinginodes;	/* (u) inodes being freed */
-	ino_t	 fs_snapinum[FSMAXSNAP];/* list of snapshot inode numbers */
+	uint32_t fs_snapinum[FSMAXSNAP];/* list of snapshot inode numbers */
 	u_int32_t fs_avgfilesize;	/* expected average file size */
 	u_int32_t fs_avgfpdir;		/* expected # of files per directory */
 	int32_t	 fs_save_cgsize;	/* save real cg size to use fs_bsize */
@@ -377,14 +392,14 @@
 #define	FS_UFS2_MAGIC	0x19540119	/* UFS2 fast filesystem magic number */
 #define	FS_BAD_MAGIC	0x19960408	/* UFS incomplete newfs magic number */
 #define	FS_OKAY		0x7c269d38	/* superblock checksum */
-#define FS_42INODEFMT	-1		/* 4.2BSD inode format */
-#define FS_44INODEFMT	2		/* 4.4BSD inode format */
+#define	FS_42INODEFMT	-1		/* 4.2BSD inode format */
+#define	FS_44INODEFMT	2		/* 4.4BSD inode format */
 
 /*
  * Preference for optimization.
  */
-#define FS_OPTTIME	0	/* minimize allocation time */
-#define FS_OPTSPACE	1	/* minimize disk fragmentation */
+#define	FS_OPTTIME	0	/* minimize allocation time */
+#define	FS_OPTSPACE	1	/* minimize disk fragmentation */
 
 /*
  * Filesystem flags.
@@ -414,16 +429,16 @@
  * labels into extended attributes on the file system rather than maintain
  * a single mount label for all objects.
  */
-#define FS_UNCLEAN	0x0001	/* filesystem not clean at mount */
-#define FS_DOSOFTDEP	0x0002	/* filesystem using soft dependencies */
-#define FS_NEEDSFSCK	0x0004	/* filesystem needs sync fsck before mount */
+#define	FS_UNCLEAN	0x0001	/* filesystem not clean at mount */
+#define	FS_DOSOFTDEP	0x0002	/* filesystem using soft dependencies */
+#define	FS_NEEDSFSCK	0x0004	/* filesystem needs sync fsck before mount */
 #define	FS_SUJ       	0x0008	/* Filesystem using softupdate journal */
-#define FS_ACLS		0x0010	/* file system has POSIX.1e ACLs enabled */
-#define FS_MULTILABEL	0x0020	/* file system is MAC multi-label */
-#define FS_GJOURNAL	0x0040	/* gjournaled file system */
-#define FS_FLAGS_UPDATED 0x0080	/* flags have been moved to new location */
-#define FS_NFS4ACLS	0x0100	/* file system has NFSv4 ACLs enabled */
-#define FS_INDEXDIRS	0x0200	/* kernel supports indexed directories */
+#define	FS_ACLS		0x0010	/* file system has POSIX.1e ACLs enabled */
+#define	FS_MULTILABEL	0x0020	/* file system is MAC multi-label */
+#define	FS_GJOURNAL	0x0040	/* gjournaled file system */
+#define	FS_FLAGS_UPDATED 0x0080	/* flags have been moved to new location */
+#define	FS_NFS4ACLS	0x0100	/* file system has NFSv4 ACLs enabled */
+#define	FS_INDEXDIRS	0x0200	/* kernel supports indexed directories */
 #define	FS_TRIM		0x0400	/* issue BIO_DELETE for deleted blocks */
 
 /*
@@ -446,7 +461,7 @@
  * Its size is derived from the size of the maps maintained in the
  * cylinder group and the (struct cg) size.
  */
-#define CGSIZE(fs) \
+#define	CGSIZE(fs) \
     /* base cg */	(sizeof(struct cg) + sizeof(int32_t) + \
     /* old btotoff */	(fs)->fs_old_cpg * sizeof(int32_t) + \
     /* old boff */	(fs)->fs_old_cpg * sizeof(u_int16_t) + \
@@ -459,12 +474,12 @@
 /*
  * The minimal number of cylinder groups that should be created.
  */
-#define MINCYLGRPS	4
+#define	MINCYLGRPS	4
 
 /*
  * Convert cylinder group to base address of its global summary info.
  */
-#define fs_cs(fs, indx) fs_csp[indx]
+#define	fs_cs(fs, indx) fs_csp[indx]
 
 /*
  * Cylinder group block for a filesystem.
@@ -504,14 +519,14 @@
 /*
  * Macros for access to cylinder group array structures
  */
-#define cg_chkmagic(cgp) ((cgp)->cg_magic == CG_MAGIC)
-#define cg_inosused(cgp) \
+#define	cg_chkmagic(cgp) ((cgp)->cg_magic == CG_MAGIC)
+#define	cg_inosused(cgp) \
     ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_iusedoff))
-#define cg_blksfree(cgp) \
+#define	cg_blksfree(cgp) \
     ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_freeoff))
-#define cg_clustersfree(cgp) \
+#define	cg_clustersfree(cgp) \
     ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_clusteroff))
-#define cg_clustersum(cgp) \
+#define	cg_clustersum(cgp) \
     ((int32_t *)((uintptr_t)(cgp) + (cgp)->cg_clustersumoff))
 
 /*
@@ -532,7 +547,7 @@
 #define	cgimin(fs, c)	(cgstart(fs, c) + (fs)->fs_iblkno)	/* inode blk */
 #define	cgsblock(fs, c)	(cgstart(fs, c) + (fs)->fs_sblkno)	/* super blk */
 #define	cgtod(fs, c)	(cgstart(fs, c) + (fs)->fs_cblkno)	/* cg block */
-#define cgstart(fs, c)							\
+#define	cgstart(fs, c)							\
        ((fs)->fs_magic == FS_UFS2_MAGIC ? cgbase(fs, c) :		\
        (cgbase(fs, c) + (fs)->fs_old_cgoffset * ((c) & ~((fs)->fs_old_cgmask))))
 
@@ -559,7 +574,7 @@
  * Extract the bits for a block from a map.
  * Compute the cylinder and rotational position of a cyl block addr.
  */
-#define blkmap(fs, map, loc) \
+#define	blkmap(fs, map, loc) \
     (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag)))
 
 /*
@@ -567,32 +582,32 @@
  * quantities by using shifts and masks in place of divisions
  * modulos and multiplications.
  */
-#define blkoff(fs, loc)		/* calculates (loc % fs->fs_bsize) */ \
+#define	blkoff(fs, loc)		/* calculates (loc % fs->fs_bsize) */ \
 	((loc) & (fs)->fs_qbmask)
-#define fragoff(fs, loc)	/* calculates (loc % fs->fs_fsize) */ \
+#define	fragoff(fs, loc)	/* calculates (loc % fs->fs_fsize) */ \
 	((loc) & (fs)->fs_qfmask)
-#define lfragtosize(fs, frag)	/* calculates ((off_t)frag * fs->fs_fsize) */ \
+#define	lfragtosize(fs, frag)	/* calculates ((off_t)frag * fs->fs_fsize) */ \
 	(((off_t)(frag)) << (fs)->fs_fshift)
-#define lblktosize(fs, blk)	/* calculates ((off_t)blk * fs->fs_bsize) */ \
+#define	lblktosize(fs, blk)	/* calculates ((off_t)blk * fs->fs_bsize) */ \
 	(((off_t)(blk)) << (fs)->fs_bshift)
 /* Use this only when `blk' is known to be small, e.g., < NDADDR. */
-#define smalllblktosize(fs, blk)    /* calculates (blk * fs->fs_bsize) */ \
+#define	smalllblktosize(fs, blk)    /* calculates (blk * fs->fs_bsize) */ \
 	((blk) << (fs)->fs_bshift)
-#define lblkno(fs, loc)		/* calculates (loc / fs->fs_bsize) */ \
+#define	lblkno(fs, loc)		/* calculates (loc / fs->fs_bsize) */ \
 	((loc) >> (fs)->fs_bshift)
-#define numfrags(fs, loc)	/* calculates (loc / fs->fs_fsize) */ \
+#define	numfrags(fs, loc)	/* calculates (loc / fs->fs_fsize) */ \
 	((loc) >> (fs)->fs_fshift)
-#define blkroundup(fs, size)	/* calculates roundup(size, fs->fs_bsize) */ \
+#define	blkroundup(fs, size)	/* calculates roundup(size, fs->fs_bsize) */ \
 	(((size) + (fs)->fs_qbmask) & (fs)->fs_bmask)
-#define fragroundup(fs, size)	/* calculates roundup(size, fs->fs_fsize) */ \
+#define	fragroundup(fs, size)	/* calculates roundup(size, fs->fs_fsize) */ \
 	(((size) + (fs)->fs_qfmask) & (fs)->fs_fmask)
-#define fragstoblks(fs, frags)	/* calculates (frags / fs->fs_frag) */ \
+#define	fragstoblks(fs, frags)	/* calculates (frags / fs->fs_frag) */ \
 	((frags) >> (fs)->fs_fragshift)
-#define blkstofrags(fs, blks)	/* calculates (blks * fs->fs_frag) */ \
+#define	blkstofrags(fs, blks)	/* calculates (blks * fs->fs_frag) */ \
 	((blks) << (fs)->fs_fragshift)
-#define fragnum(fs, fsb)	/* calculates (fsb % fs->fs_frag) */ \
+#define	fragnum(fs, fsb)	/* calculates (fsb % fs->fs_frag) */ \
 	((fsb) & ((fs)->fs_frag - 1))
-#define blknum(fs, fsb)		/* calculates rounddown(fsb, fs->fs_frag) */ \
+#define	blknum(fs, fsb)		/* calculates rounddown(fsb, fs->fs_frag) */ \
 	((fsb) &~ ((fs)->fs_frag - 1))
 
 /*
@@ -599,7 +614,7 @@
  * Determine the number of available frags given a
  * percentage to hold in reserve.
  */
-#define freespace(fs, percentreserved) \
+#define	freespace(fs, percentreserved) \
 	(blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \
 	(fs)->fs_cstotal.cs_nffree - \
 	(((off_t)((fs)->fs_dsize)) * (percentreserved) / 100))
@@ -607,11 +622,11 @@
 /*
  * Determining the size of a file block in the filesystem.
  */
-#define blksize(fs, ip, lbn) \
+#define	blksize(fs, ip, lbn) \
 	(((lbn) >= NDADDR || (ip)->i_size >= smalllblktosize(fs, (lbn) + 1)) \
 	    ? (fs)->fs_bsize \
 	    : (fragroundup(fs, blkoff(fs, (ip)->i_size))))
-#define sblksize(fs, size, lbn) \
+#define	sblksize(fs, size, lbn) \
 	(((lbn) >= NDADDR || (size) >= ((lbn) + 1) << (fs)->fs_bshift) \
 	  ? (fs)->fs_bsize \
 	  : (fragroundup(fs, blkoff(fs, (size)))))
@@ -702,11 +717,11 @@
  */
 struct jrefrec {
 	uint32_t	jr_op;
-	ino_t		jr_ino;
-	ino_t		jr_parent;
+	uint32_t	jr_ino;
+	uint32_t	jr_parent;
 	uint16_t	jr_nlink;
 	uint16_t	jr_mode;
-	off_t		jr_diroff;
+	int64_t		jr_diroff;
 	uint64_t	jr_unused;
 };
 
@@ -716,11 +731,11 @@
  */
 struct jmvrec {
 	uint32_t	jm_op;
-	ino_t		jm_ino;
-	ino_t		jm_parent;
+	uint32_t	jm_ino;
+	uint32_t	jm_parent;
 	uint16_t	jm_unused;
-	off_t		jm_oldoff;
-	off_t		jm_newoff;
+	int64_t		jm_oldoff;
+	int64_t		jm_newoff;
 };
 
 /*
@@ -744,7 +759,7 @@
 struct jtrncrec {
 	uint32_t	jt_op;
 	uint32_t	jt_ino;
-	off_t		jt_size;
+	int64_t		jt_size;
 	uint32_t	jt_extsize;
 	uint32_t	jt_pad[3];
 };

Modified: trunk/sys/ufs/ffs/softdep.h
===================================================================
--- trunk/sys/ufs/ffs/softdep.h	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/softdep.h	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
  *
@@ -36,7 +37,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)softdep.h	9.7 (McKusick) 6/21/00
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ffs/softdep.h 307534 2016-10-17 21:49:54Z mckusick $
  */
 
 #include <sys/queue.h>
@@ -140,10 +141,44 @@
 #define	UNLINKPREV	0x100000 /* inodedep is pointed at in the unlink list */
 #define	UNLINKONLIST	0x200000 /* inodedep is in the unlinked list on disk */
 #define	UNLINKLINKS	(UNLINKNEXT | UNLINKPREV)
+#define	WRITESUCCEEDED	0x400000 /* the disk write completed successfully */
 
 #define	ALLCOMPLETE	(ATTACHED | COMPLETE | DEPCOMPLETE)
 
 /*
+ * Values for each of the soft dependency types.
+ */
+#define	D_PAGEDEP	0
+#define	D_INODEDEP	1
+#define	D_BMSAFEMAP	2
+#define	D_NEWBLK	3
+#define	D_ALLOCDIRECT	4
+#define	D_INDIRDEP	5
+#define	D_ALLOCINDIR	6
+#define	D_FREEFRAG	7
+#define	D_FREEBLKS	8
+#define	D_FREEFILE	9
+#define	D_DIRADD	10
+#define	D_MKDIR		11
+#define	D_DIRREM	12
+#define	D_NEWDIRBLK	13
+#define	D_FREEWORK	14
+#define	D_FREEDEP	15
+#define	D_JADDREF	16
+#define	D_JREMREF	17
+#define	D_JMVREF	18
+#define	D_JNEWBLK	19
+#define	D_JFREEBLK	20
+#define	D_JFREEFRAG	21
+#define	D_JSEG		22
+#define	D_JSEGDEP	23
+#define	D_SBDEP		24
+#define	D_JTRUNC	25
+#define	D_JFSYNC	26
+#define	D_SENTINEL	27
+#define	D_LAST		D_SENTINEL
+
+/*
  * The workitem queue.
  * 
  * It is sometimes useful and/or necessary to clean up certain dependencies
@@ -170,22 +205,22 @@
 	unsigned int		wk_type:8,	/* type of request */
 				wk_state:24;	/* state flags */
 };
-#define WK_DATA(wk) ((void *)(wk))
-#define WK_PAGEDEP(wk) ((struct pagedep *)(wk))
-#define WK_INODEDEP(wk) ((struct inodedep *)(wk))
-#define WK_BMSAFEMAP(wk) ((struct bmsafemap *)(wk))
+#define	WK_DATA(wk) ((void *)(wk))
+#define	WK_PAGEDEP(wk) ((struct pagedep *)(wk))
+#define	WK_INODEDEP(wk) ((struct inodedep *)(wk))
+#define	WK_BMSAFEMAP(wk) ((struct bmsafemap *)(wk))
 #define	WK_NEWBLK(wk)  ((struct newblk *)(wk))
-#define WK_ALLOCDIRECT(wk) ((struct allocdirect *)(wk))
-#define WK_INDIRDEP(wk) ((struct indirdep *)(wk))
-#define WK_ALLOCINDIR(wk) ((struct allocindir *)(wk))
-#define WK_FREEFRAG(wk) ((struct freefrag *)(wk))
-#define WK_FREEBLKS(wk) ((struct freeblks *)(wk))
-#define WK_FREEWORK(wk) ((struct freework *)(wk))
-#define WK_FREEFILE(wk) ((struct freefile *)(wk))
-#define WK_DIRADD(wk) ((struct diradd *)(wk))
-#define WK_MKDIR(wk) ((struct mkdir *)(wk))
-#define WK_DIRREM(wk) ((struct dirrem *)(wk))
-#define WK_NEWDIRBLK(wk) ((struct newdirblk *)(wk))
+#define	WK_ALLOCDIRECT(wk) ((struct allocdirect *)(wk))
+#define	WK_INDIRDEP(wk) ((struct indirdep *)(wk))
+#define	WK_ALLOCINDIR(wk) ((struct allocindir *)(wk))
+#define	WK_FREEFRAG(wk) ((struct freefrag *)(wk))
+#define	WK_FREEBLKS(wk) ((struct freeblks *)(wk))
+#define	WK_FREEWORK(wk) ((struct freework *)(wk))
+#define	WK_FREEFILE(wk) ((struct freefile *)(wk))
+#define	WK_DIRADD(wk) ((struct diradd *)(wk))
+#define	WK_MKDIR(wk) ((struct mkdir *)(wk))
+#define	WK_DIRREM(wk) ((struct dirrem *)(wk))
+#define	WK_NEWDIRBLK(wk) ((struct newdirblk *)(wk))
 #define	WK_JADDREF(wk) ((struct jaddref *)(wk))
 #define	WK_JREMREF(wk) ((struct jremref *)(wk))
 #define	WK_JMVREF(wk) ((struct jmvref *)(wk))
@@ -239,8 +274,8 @@
  * list, any removed operations are done, and the dependency structure
  * is freed.
  */
-#define DAHASHSZ 5
-#define DIRADDHASH(offset) (((offset) >> 2) % DAHASHSZ)
+#define	DAHASHSZ 5
+#define	DIRADDHASH(offset) (((offset) >> 2) % DAHASHSZ)
 struct pagedep {
 	struct	worklist pd_list;	/* page buffer */
 #	define	pd_state pd_list.wk_state /* check for multiple I/O starts */
@@ -330,8 +365,8 @@
 	struct	ufs2_dinode *idu_savedino2; /* saved ufs2_dinode contents */
 	} id_un;
 };
-#define id_savedino1 id_un.idu_savedino1
-#define id_savedino2 id_un.idu_savedino2
+#define	id_savedino1 id_un.idu_savedino1
+#define	id_savedino2 id_un.idu_savedino2
 
 /*
  * A "bmsafemap" structure maintains a list of dependency structures
@@ -416,7 +451,8 @@
  */
 struct allocdirect {
 	struct	newblk ad_block;	/* Common block logic */
-#	define	ad_state ad_block.nb_list.wk_state /* block pointer state */
+#	define	ad_list ad_block.nb_list /* block pointer worklist */
+#	define	ad_state ad_list.wk_state /* block pointer state */
 	TAILQ_ENTRY(allocdirect) ad_next; /* inodedep's list of allocdirect's */
 	struct	inodedep *ad_inodedep;	/* associated inodedep */
 	ufs2_daddr_t	ad_oldblkno;	/* old value of block pointer */
@@ -644,8 +680,8 @@
 	} da_un;
 	struct workhead da_jwork;	/* Journal work awaiting completion. */
 };
-#define da_previous da_un.dau_previous
-#define da_pagedep da_un.dau_pagedep
+#define	da_previous da_un.dau_previous
+#define	da_pagedep da_un.dau_pagedep
 
 /*
  * Two "mkdir" structures are needed to track the additional dependencies
@@ -701,8 +737,8 @@
 	} dm_un;
 	struct workhead dm_jwork;	/* Journal work awaiting completion. */
 };
-#define dm_pagedep dm_un.dmu_pagedep
-#define dm_dirinum dm_un.dmu_dirinum
+#define	dm_pagedep dm_un.dmu_pagedep
+#define	dm_dirinum dm_un.dmu_dirinum
 
 /*
  * A "newdirblk" structure tracks the progress of a newly allocated
@@ -947,3 +983,118 @@
 	struct	fs	*sb_fs;		/* Filesystem pointer within buf. */
 	struct	ufsmount *sb_ump;	/* Our mount structure */
 };
+
+/*
+ * Private journaling structures.
+ */
+struct jblocks {
+	struct jseglst	jb_segs;	/* TAILQ of current segments. */
+	struct jseg	*jb_writeseg;	/* Next write to complete. */
+	struct jseg	*jb_oldestseg;	/* Oldest segment with valid entries. */
+	struct jextent	*jb_extent;	/* Extent array. */
+	uint64_t	jb_nextseq;	/* Next sequence number. */
+	uint64_t	jb_oldestwrseq;	/* Oldest written sequence number. */
+	uint8_t		jb_needseg;	/* Need a forced segment. */
+	uint8_t		jb_suspended;	/* Did journal suspend writes? */
+	int		jb_avail;	/* Available extents. */
+	int		jb_used;	/* Last used extent. */
+	int		jb_head;	/* Allocator head. */
+	int		jb_off;		/* Allocator extent offset. */
+	int		jb_blocks;	/* Total disk blocks covered. */
+	int		jb_free;	/* Total disk blocks free. */
+	int		jb_min;		/* Minimum free space. */
+	int		jb_low;		/* Low on space. */
+	int		jb_age;		/* Insertion time of oldest rec. */
+};
+
+struct jextent {
+	ufs2_daddr_t	je_daddr;	/* Disk block address. */
+	int		je_blocks;	/* Disk block count. */
+};
+
+/*
+ * Hash table declarations.
+ */
+LIST_HEAD(mkdirlist, mkdir);
+LIST_HEAD(pagedep_hashhead, pagedep);
+LIST_HEAD(inodedep_hashhead, inodedep);
+LIST_HEAD(newblk_hashhead, newblk);
+LIST_HEAD(bmsafemap_hashhead, bmsafemap);
+TAILQ_HEAD(indir_hashhead, freework);
+
+/*
+ * Per-filesystem soft dependency data.
+ * Allocated at mount and freed at unmount.
+ */
+struct mount_softdeps {
+	struct	rwlock sd_fslock;		/* softdep lock */
+	struct	workhead sd_workitem_pending;	/* softdep work queue */
+	struct	worklist *sd_worklist_tail;	/* Tail pointer for above */
+	struct	workhead sd_journal_pending;	/* journal work queue */
+	struct	worklist *sd_journal_tail;	/* Tail pointer for above */
+	struct	jblocks *sd_jblocks;		/* Journal block information */
+	struct	inodedeplst sd_unlinked;	/* Unlinked inodes */
+	struct	bmsafemaphd sd_dirtycg;		/* Dirty CGs */
+	struct	mkdirlist sd_mkdirlisthd;	/* Track mkdirs */
+	struct	pagedep_hashhead *sd_pdhash;	/* pagedep hash table */
+	u_long	sd_pdhashsize;			/* pagedep hash table size-1 */
+	long	sd_pdnextclean;			/* next hash bucket to clean */
+	struct	inodedep_hashhead *sd_idhash;	/* inodedep hash table */
+	u_long	sd_idhashsize;			/* inodedep hash table size-1 */
+	long	sd_idnextclean;			/* next hash bucket to clean */
+	struct	newblk_hashhead *sd_newblkhash;	/* newblk hash table */
+	u_long	sd_newblkhashsize;		/* newblk hash table size-1 */
+	struct	bmsafemap_hashhead *sd_bmhash;	/* bmsafemap hash table */
+	u_long	sd_bmhashsize;			/* bmsafemap hash table size-1*/
+	struct	indir_hashhead *sd_indirhash;	/* indir hash table */
+	u_long	sd_indirhashsize;		/* indir hash table size-1 */
+	int	sd_on_journal;			/* Items on the journal list */
+	int	sd_on_worklist;			/* Items on the worklist */
+	int	sd_deps;			/* Total dependency count */
+	int	sd_accdeps;			/* accumulated dep count */
+	int	sd_req;				/* Wakeup when deps hits 0. */
+	int	sd_flags;			/* comm with flushing thread */
+	int	sd_cleanups;			/* Calls to cleanup */
+	struct	thread *sd_flushtd;		/* thread handling flushing */
+	TAILQ_ENTRY(mount_softdeps) sd_next;	/* List of softdep filesystem */
+	struct	ufsmount *sd_ump;		/* our ufsmount structure */
+	u_long	sd_curdeps[D_LAST + 1];		/* count of current deps */
+};
+/*
+ * Flags for communicating with the syncer thread.
+ */
+#define FLUSH_EXIT	0x0001	/* time to exit */
+#define FLUSH_CLEANUP	0x0002	/* need to clear out softdep structures */
+#define	FLUSH_STARTING	0x0004	/* flush thread not yet started */
+
+/*
+ * Keep the old names from when these were in the ufsmount structure.
+ */
+#define	softdep_workitem_pending	um_softdep->sd_workitem_pending
+#define	softdep_worklist_tail		um_softdep->sd_worklist_tail
+#define	softdep_journal_pending		um_softdep->sd_journal_pending
+#define	softdep_journal_tail		um_softdep->sd_journal_tail
+#define	softdep_jblocks			um_softdep->sd_jblocks
+#define	softdep_unlinked		um_softdep->sd_unlinked
+#define	softdep_dirtycg			um_softdep->sd_dirtycg
+#define	softdep_mkdirlisthd		um_softdep->sd_mkdirlisthd
+#define	pagedep_hashtbl			um_softdep->sd_pdhash
+#define	pagedep_hash_size		um_softdep->sd_pdhashsize
+#define	pagedep_nextclean		um_softdep->sd_pdnextclean
+#define	inodedep_hashtbl		um_softdep->sd_idhash
+#define	inodedep_hash_size		um_softdep->sd_idhashsize
+#define	inodedep_nextclean		um_softdep->sd_idnextclean
+#define	newblk_hashtbl			um_softdep->sd_newblkhash
+#define	newblk_hash_size		um_softdep->sd_newblkhashsize
+#define	bmsafemap_hashtbl		um_softdep->sd_bmhash
+#define	bmsafemap_hash_size		um_softdep->sd_bmhashsize
+#define	indir_hashtbl			um_softdep->sd_indirhash
+#define	indir_hash_size			um_softdep->sd_indirhashsize
+#define	softdep_on_journal		um_softdep->sd_on_journal
+#define	softdep_on_worklist		um_softdep->sd_on_worklist
+#define	softdep_deps			um_softdep->sd_deps
+#define	softdep_accdeps			um_softdep->sd_accdeps
+#define	softdep_req			um_softdep->sd_req
+#define	softdep_flags			um_softdep->sd_flags
+#define	softdep_flushtd			um_softdep->sd_flushtd
+#define	softdep_curdeps			um_softdep->sd_curdeps

Modified: trunk/sys/ufs/ufs/README.acls
===================================================================
--- trunk/sys/ufs/ufs/README.acls	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/README.acls	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,4 +1,4 @@
-$MidnightBSD$
+$FreeBSD: stable/10/sys/ufs/ufs/README.acls 105456 2002-10-19 16:09:16Z rwatson $
 
   UFS Access Control Lists Copyright
 


Property changes on: trunk/sys/ufs/ufs/README.acls
___________________________________________________________________
Added: mnbsd:nokeywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/ufs/ufs/README.extattr
===================================================================
--- trunk/sys/ufs/ufs/README.extattr	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/README.extattr	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,4 +1,4 @@
-$MidnightBSD$
+$FreeBSD: stable/10/sys/ufs/ufs/README.extattr 105417 2002-10-18 21:11:36Z rwatson $
 
   UFS Extended Attributes Copyright
 


Property changes on: trunk/sys/ufs/ufs/README.extattr
___________________________________________________________________
Added: mnbsd:nokeywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/ufs/ufs/acl.h
===================================================================
--- trunk/sys/ufs/ufs/acl.h	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/acl.h	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1999-2001 Robert N. M. Watson
  * All rights reserved.
@@ -25,7 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/acl.h 200796 2009-12-21 19:39:10Z trasz $
  */
 /*
  * Developed by the TrustedBSD Project.

Modified: trunk/sys/ufs/ufs/dinode.h
===================================================================
--- trunk/sys/ufs/ufs/dinode.h	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/dinode.h	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
@@ -62,7 +63,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)dinode.h	8.3 (Berkeley) 1/21/94
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/dinode.h 259223 2013-12-11 19:25:17Z pfg $
  */
 
 #ifndef _UFS_UFS_DINODE_H_
@@ -138,15 +139,15 @@
 	int32_t		di_atimensec;	/*  68: Last access time. */
 	int32_t		di_ctimensec;	/*  72: Last inode change time. */
 	int32_t		di_birthnsec;	/*  76: Inode creation time. */
-	int32_t		di_gen;		/*  80: Generation number. */
+	u_int32_t	di_gen;		/*  80: Generation number. */
 	u_int32_t	di_kernflags;	/*  84: Kernel flags. */
 	u_int32_t	di_flags;	/*  88: Status flags (chflags). */
-	int32_t		di_extsize;	/*  92: External attributes block. */
+	u_int32_t	di_extsize;	/*  92: External attributes size. */
 	ufs2_daddr_t	di_extb[NXADDR];/*  96: External attributes block. */
 	ufs2_daddr_t	di_db[NDADDR];	/* 112: Direct disk blocks. */
 	ufs2_daddr_t	di_ib[NIADDR];	/* 208: Indirect disk blocks. */
 	u_int64_t	di_modrev;	/* 232: i_modrev for NFSv4 */
-	ino_t		di_freelink;	/* 240: SUJ: Next unlinked inode. */
+	uint32_t	di_freelink;	/* 240: SUJ: Next unlinked inode. */
 	uint32_t	di_spare[3];	/* 244: Reserved; currently unused */
 };
 
@@ -168,7 +169,7 @@
 struct ufs1_dinode {
 	u_int16_t	di_mode;	/*   0: IFMT, permissions; see below. */
 	int16_t		di_nlink;	/*   2: File link count. */
-	ino_t		di_freelink;	/*   4: SUJ: Next unlinked inode. */
+	uint32_t	di_freelink;	/*   4: SUJ: Next unlinked inode. */
 	u_int64_t	di_size;	/*   8: File byte count. */
 	int32_t		di_atime;	/*  16: Last access time. */
 	int32_t		di_atimensec;	/*  20: Last access time. */
@@ -179,8 +180,8 @@
 	ufs1_daddr_t	di_db[NDADDR];	/*  40: Direct disk blocks. */
 	ufs1_daddr_t	di_ib[NIADDR];	/*  88: Indirect disk blocks. */
 	u_int32_t	di_flags;	/* 100: Status flags (chflags). */
-	int32_t		di_blocks;	/* 104: Blocks actually held. */
-	int32_t		di_gen;		/* 108: Generation number. */
+	u_int32_t	di_blocks;	/* 104: Blocks actually held. */
+	u_int32_t	di_gen;		/* 108: Generation number. */
 	u_int32_t	di_uid;		/* 112: File owner. */
 	u_int32_t	di_gid;		/* 116: File group. */
 	u_int64_t	di_modrev;	/* 120: i_modrev for NFSv4 */

Modified: trunk/sys/ufs/ufs/dir.h
===================================================================
--- trunk/sys/ufs/ufs/dir.h	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/dir.h	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,7 +33,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)dir.h	8.2 (Berkeley) 1/21/94
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/dir.h 262779 2014-03-05 04:23:19Z pfg $
  */
 
 #ifndef _UFS_UFS_DIR_H_
@@ -44,7 +45,7 @@
  * quantity to keep down the cost of doing lookup on a 32-bit machine.
  */
 #define	doff_t		int32_t
-#define MAXDIRSIZE	(0x7fffffff)
+#define	MAXDIRSIZE	(0x7fffffff)
 
 /*
  * A directory consists of some number of blocks of DIRBLKSIZ
@@ -71,7 +72,7 @@
  * Entries other than the first in a directory do not normally have
  * dp->d_ino set to 0.
  */
-#define DIRBLKSIZ	DEV_BSIZE
+#define	DIRBLKSIZ	DEV_BSIZE
 #define	MAXNAMLEN	255
 
 struct	direct {
@@ -113,14 +114,14 @@
 	(((uintptr_t)&((struct direct *)0)->d_name +			\
 	  ((namlen)+1)*sizeof(((struct direct *)0)->d_name[0]) + 3) & ~3)
 #if (BYTE_ORDER == LITTLE_ENDIAN)
-#define DIRSIZ(oldfmt, dp) \
+#define	DIRSIZ(oldfmt, dp) \
     ((oldfmt) ? DIRECTSIZ((dp)->d_type) : DIRECTSIZ((dp)->d_namlen))
 #else
-#define DIRSIZ(oldfmt, dp) \
+#define	DIRSIZ(oldfmt, dp) \
     DIRECTSIZ((dp)->d_namlen)
 #endif
-#define OLDDIRFMT	1
-#define NEWDIRFMT	0
+#define	OLDDIRFMT	1
+#define	NEWDIRFMT	0
 
 /*
  * Template for manipulating directories.  Should use struct direct's,

Modified: trunk/sys/ufs/ufs/dirhash.h
===================================================================
--- trunk/sys/ufs/ufs/dirhash.h	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/dirhash.h	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2001 Ian Dowse.  All rights reserved.
  *
@@ -22,11 +23,11 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/dirhash.h 262779 2014-03-05 04:23:19Z pfg $
  */
 
 #ifndef _UFS_UFS_DIRHASH_H_
-#define _UFS_UFS_DIRHASH_H_
+#define	_UFS_UFS_DIRHASH_H_
 
 #include <sys/_lock.h>
 #include <sys/_sx.h>
@@ -44,11 +45,11 @@
  * We also maintain information about free space in each block
  * to speed up creations.
  */
-#define DIRHASH_EMPTY	(-1)	/* entry unused */
-#define DIRHASH_DEL	(-2)	/* deleted entry; may be part of chain */
+#define	DIRHASH_EMPTY	(-1)	/* entry unused */
+#define	DIRHASH_DEL	(-2)	/* deleted entry; may be part of chain */
 
-#define DIRALIGN	4
-#define DH_NFSTATS	(DIRECTSIZ(MAXNAMLEN + 1) / DIRALIGN)
+#define	DIRALIGN	4
+#define	DH_NFSTATS	(DIRECTSIZ(MAXNAMLEN + 1) / DIRALIGN)
 				 /* max DIRALIGN words in a directory entry */
 
 /*
@@ -68,18 +69,18 @@
  * case it limits the number of hash builds to 1/DH_SCOREINIT of the
  * number of accesses.
  */ 
-#define DH_SCOREINIT	8	/* initial dh_score when dirhash built */
-#define DH_SCOREMAX	64	/* max dh_score value */
+#define	DH_SCOREINIT	8	/* initial dh_score when dirhash built */
+#define	DH_SCOREMAX	64	/* max dh_score value */
 
 /*
  * The main hash table has 2 levels. It is an array of pointers to
  * blocks of DH_NBLKOFF offsets.
  */
-#define DH_BLKOFFSHIFT	8
-#define DH_NBLKOFF	(1 << DH_BLKOFFSHIFT)
-#define DH_BLKOFFMASK	(DH_NBLKOFF - 1)
+#define	DH_BLKOFFSHIFT	8
+#define	DH_NBLKOFF	(1 << DH_BLKOFFSHIFT)
+#define	DH_BLKOFFMASK	(DH_NBLKOFF - 1)
 
-#define DH_ENTRY(dh, slot) \
+#define	DH_ENTRY(dh, slot) \
     ((dh)->dh_hash[(slot) >> DH_BLKOFFSHIFT][(slot) & DH_BLKOFFMASK])
 
 struct dirhash {

Modified: trunk/sys/ufs/ufs/extattr.h
===================================================================
--- trunk/sys/ufs/ufs/extattr.h	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/extattr.h	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1999-2001 Robert N. M. Watson
  * All rights reserved.
@@ -25,7 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/extattr.h 262779 2014-03-05 04:23:19Z pfg $
  */
 /*
  * Developed by the TrustedBSD Project.
@@ -94,14 +95,14 @@
  *	attribute name to calculate and set the ea_length, ea_namelength,
  *	and ea_contentpadlen fields of the extended attribute structure.
  */
-#define EXTATTR_NEXT(eap) \
+#define	EXTATTR_NEXT(eap) \
 	((struct extattr *)(((void *)(eap)) + (eap)->ea_length))
-#define EXTATTR_CONTENT(eap) (((void *)(eap)) + EXTATTR_BASE_LENGTH(eap))
-#define EXTATTR_CONTENT_SIZE(eap) \
+#define	EXTATTR_CONTENT(eap) (((void *)(eap)) + EXTATTR_BASE_LENGTH(eap))
+#define	EXTATTR_CONTENT_SIZE(eap) \
 	((eap)->ea_length - EXTATTR_BASE_LENGTH(eap) - (eap)->ea_contentpadlen)
-#define EXTATTR_BASE_LENGTH(eap) \
+#define	EXTATTR_BASE_LENGTH(eap) \
 	((sizeof(struct extattr) + (eap)->ea_namelength + 7) & ~7)
-#define EXTATTR_SET_LENGTHS(eap, contentsize) do { \
+#define	EXTATTR_SET_LENGTHS(eap, contentsize) do { \
 	KASSERT(((eap)->ea_name[0] != 0), \
 		("Must initialize name before setting lengths")); \
 	(eap)->ea_namelength = strlen((eap)->ea_name); \
@@ -115,10 +116,6 @@
 
 #include <sys/_sx.h>
 
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_EXTATTR);
-#endif
-
 struct vnode;
 LIST_HEAD(ufs_extattr_list_head, ufs_extattr_list_entry);
 struct ufs_extattr_list_entry {
@@ -152,7 +149,7 @@
 #else
 
 /* User-level definition of KASSERT for macros above */
-#define KASSERT(cond, str) do { \
+#define	KASSERT(cond, str) do { \
         if (!(cond)) { printf("panic: "); printf(str); printf("\n"); exit(1); }\
 } while (0)
 

Modified: trunk/sys/ufs/ufs/gjournal.h
===================================================================
--- trunk/sys/ufs/ufs/gjournal.h	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/gjournal.h	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd at FreeBSD.org>
  * All rights reserved.
@@ -23,11 +24,11 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/gjournal.h 262779 2014-03-05 04:23:19Z pfg $
  */
 
 #ifndef _UFS_UFS_GJOURNAL_H_
-#define _UFS_UFS_GJOURNAL_H_
+#define	_UFS_UFS_GJOURNAL_H_
 
 /*
  * GEOM journal function prototypes.

Modified: trunk/sys/ufs/ufs/inode.h
===================================================================
--- trunk/sys/ufs/ufs/inode.h	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/inode.h	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,7 +33,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)inode.h	8.9 (Berkeley) 5/14/95
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/inode.h 283640 2015-05-28 00:11:36Z mckusick $
  */
 
 #ifndef _UFS_UFS_INODE_H_
@@ -87,6 +88,8 @@
 		daddr_t *snapblklist;    /* Collect expunged snapshot blocks. */
 	} i_un;
 
+	int	i_nextclustercg; /* last cg searched for cluster */
+
 	/*
 	 * Data for extended attribute modification.
  	 */
@@ -102,7 +105,7 @@
 	int16_t	  i_nlink;	/* File link count. */
 	u_int64_t i_size;	/* File byte count. */
 	u_int32_t i_flags;	/* Status flags (chflags). */
-	int64_t	  i_gen;	/* Generation number. */
+	u_int64_t i_gen;	/* Generation number. */
 	u_int32_t i_uid;	/* File owner. */
 	u_int32_t i_gid;	/* File group. */
 	/*
@@ -129,12 +132,12 @@
 
 #define	IN_TRUNCATED	0x0800		/* Journaled truncation pending. */
 
-#define i_devvp i_ump->um_devvp
-#define i_umbufobj i_ump->um_bo
-#define i_dirhash i_un.dirhash
-#define i_snapblklist i_un.snapblklist
-#define i_din1 dinode_u.din1
-#define i_din2 dinode_u.din2
+#define	i_devvp i_ump->um_devvp
+#define	i_umbufobj i_ump->um_bo
+#define	i_dirhash i_un.dirhash
+#define	i_snapblklist i_un.snapblklist
+#define	i_din1 dinode_u.din1
+#define	i_din2 dinode_u.din2
 
 #ifdef _KERNEL
 /*
@@ -154,7 +157,7 @@
 #define	SHORTLINK(ip) \
 	(((ip)->i_ump->um_fstype == UFS1) ? \
 	(caddr_t)(ip)->i_din1->di_db : (caddr_t)(ip)->i_din2->di_db)
-#define IS_SNAPSHOT(ip)		((ip)->i_flags & SF_SNAPSHOT)
+#define	IS_SNAPSHOT(ip)		((ip)->i_flags & SF_SNAPSHOT)
 
 /*
  * Structure used to pass around logical block paths generated by
@@ -166,21 +169,21 @@
 };
 
 /* Convert between inode pointers and vnode pointers. */
-#define VTOI(vp)	((struct inode *)(vp)->v_data)
-#define ITOV(ip)	((ip)->i_vnode)
+#define	VTOI(vp)	((struct inode *)(vp)->v_data)
+#define	ITOV(ip)	((ip)->i_vnode)
 
 /* Determine if soft dependencies are being done */
-#define DOINGSOFTDEP(vp)   ((vp)->v_mount->mnt_flag & (MNT_SOFTDEP | MNT_SUJ))
-#define MOUNTEDSOFTDEP(mp) ((mp)->mnt_flag & (MNT_SOFTDEP | MNT_SUJ))
-#define DOINGSUJ(vp)	   ((vp)->v_mount->mnt_flag & MNT_SUJ)
-#define MOUNTEDSUJ(mp)	   ((mp)->mnt_flag & MNT_SUJ)
+#define	DOINGSOFTDEP(vp)   ((vp)->v_mount->mnt_flag & (MNT_SOFTDEP | MNT_SUJ))
+#define	MOUNTEDSOFTDEP(mp) ((mp)->mnt_flag & (MNT_SOFTDEP | MNT_SUJ))
+#define	DOINGSUJ(vp)	   ((vp)->v_mount->mnt_flag & MNT_SUJ)
+#define	MOUNTEDSUJ(mp)	   ((mp)->mnt_flag & MNT_SUJ)
 
 /* This overlays the fid structure (see mount.h). */
 struct ufid {
 	u_int16_t ufid_len;	/* Length of structure. */
 	u_int16_t ufid_pad;	/* Force 32-bit alignment. */
-	ino_t	  ufid_ino;	/* File number (ino). */
-	int32_t	  ufid_gen;	/* Generation number. */
+	uint32_t  ufid_ino;	/* File number (ino). */
+	uint32_t  ufid_gen;	/* Generation number. */
 };
 #endif /* _KERNEL */
 

Modified: trunk/sys/ufs/ufs/quota.h
===================================================================
--- trunk/sys/ufs/ufs/quota.h	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/quota.h	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -30,7 +31,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)quota.h	8.3 (Berkeley) 8/19/94
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/quota.h 262779 2014-03-05 04:23:19Z pfg $
  */
 
 #ifndef _UFS_UFS_QUOTA_H_
@@ -62,7 +63,7 @@
 /*
  * Definitions for the default names of the quotas files.
  */
-#define INITQFNAMES { \
+#define	INITQFNAMES { \
 	"user",		/* USRQUOTA */ \
 	"group",	/* GRPQUOTA */ \
 	"undefined", \
@@ -75,8 +76,8 @@
  * broken into a main command defined below and a subcommand that is used
  * to convey the type of quota that is being manipulated (see above).
  */
-#define SUBCMDMASK	0x00ff
-#define SUBCMDSHIFT	8
+#define	SUBCMDMASK	0x00ff
+#define	SUBCMDSHIFT	8
 #define	QCMD(cmd, type)	(((cmd) << SUBCMDSHIFT) | ((type) & SUBCMDMASK))
 
 #define	Q_QUOTAON	0x0100	/* enable quotas */
@@ -119,10 +120,10 @@
 	int64_t   dqb_itime;		/* time limit for excessive files */
 };
 
-#define dqblk dqblk64
+#define	dqblk dqblk64
 
-#define Q_DQHDR64_MAGIC "QUOTA64"
-#define Q_DQHDR64_VERSION 0x20081104
+#define	Q_DQHDR64_MAGIC "QUOTA64"
+#define	Q_DQHDR64_VERSION 0x20081104
 
 struct dqhdr64 {
 	char	  dqh_magic[8];		/* Q_DQHDR64_MAGIC */

Modified: trunk/sys/ufs/ufs/ufs_acl.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_acl.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_acl.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1999-2003 Robert N. M. Watson
  * All rights reserved.
@@ -31,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_acl.c 241011 2012-09-27 23:30:49Z mdf $");
 
 #include "opt_ufs.h"
 #include "opt_quota.h"
@@ -182,8 +183,8 @@
 		 * are unsafe.
 		 */
 		printf("ufs_getacl_nfs4(): Loaded invalid ACL ("
-		    "%d bytes), inumber %d on %s\n", len,
-		    ip->i_number, ip->i_fs->fs_fsmnt);
+		    "%d bytes), inumber %ju on %s\n", len,
+		    (uintmax_t)ip->i_number, ip->i_fs->fs_fsmnt);
 
 		return (EPERM);
 	}
@@ -191,8 +192,8 @@
 	error = acl_nfs4_check(aclp, vp->v_type == VDIR);
 	if (error) {
 		printf("ufs_getacl_nfs4(): Loaded invalid ACL "
-		    "(failed acl_nfs4_check), inumber %d on %s\n",
-		    ip->i_number, ip->i_fs->fs_fsmnt);
+		    "(failed acl_nfs4_check), inumber %ju on %s\n",
+		    (uintmax_t)ip->i_number, ip->i_fs->fs_fsmnt);
 
 		return (EPERM);
 	}
@@ -259,8 +260,8 @@
 		 * DAC protections are unsafe.
 		 */
 		printf("ufs_get_oldacl(): Loaded invalid ACL "
-		    "(len = %d), inumber %d on %s\n", len,
-		    ip->i_number, ip->i_fs->fs_fsmnt);
+		    "(len = %d), inumber %ju on %s\n", len,
+		    (uintmax_t)ip->i_number, ip->i_fs->fs_fsmnt);
 		return (EPERM);
 	}
 

Modified: trunk/sys/ufs/ufs/ufs_bmap.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_bmap.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_bmap.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -35,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_bmap.c 284021 2015-06-05 08:36:25Z kib $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -114,7 +115,6 @@
 	struct buf *bp;
 	struct ufsmount *ump;
 	struct mount *mp;
-	struct vnode *devvp;
 	struct indir a[NIADDR+1], *ap;
 	ufs2_daddr_t daddr;
 	ufs_lbn_t metalbn;
@@ -125,7 +125,6 @@
 	ip = VTOI(vp);
 	mp = vp->v_mount;
 	ump = VFSTOUFS(mp);
-	devvp = ump->um_devvp;
 
 	if (runp) {
 		maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;

Modified: trunk/sys/ufs/ufs/ufs_dirhash.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_dirhash.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_dirhash.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2001, 2002 Ian Dowse.  All rights reserved.
  *
@@ -28,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD: src/sys/ufs/ufs/ufs_dirhash.c,v 1.4 2012/09/14 23:16:21 laffer1 Exp $");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_dirhash.c 326846 2017-12-14 11:45:02Z kib $");
 
 #include "opt_ufs.h"
 
@@ -189,11 +190,11 @@
 	struct dirhash *ndh;
 	struct dirhash *dh;
 	struct vnode *vp;
-	int error;
+	bool excl;
 
-	error = 0;
 	ndh = dh = NULL;
 	vp = ip->i_vnode;
+	excl = false;
 	for (;;) {
 		/* Racy check for i_dirhash to prefetch a dirhash structure. */
 		if (ip->i_dirhash == NULL && ndh == NULL) {
@@ -230,8 +231,11 @@
 		ufsdirhash_hold(dh);
 		VI_UNLOCK(vp);
 
-		/* Acquire a shared lock on existing hashes. */
-		sx_slock(&dh->dh_lock);
+		/* Acquire a lock on existing hashes. */
+		if (excl)
+			sx_xlock(&dh->dh_lock);
+		else
+			sx_slock(&dh->dh_lock);
 
 		/* The hash could've been recycled while we were waiting. */
 		VI_LOCK(vp);
@@ -252,9 +256,10 @@
 		 * so we can recreate it.  If we fail the upgrade, drop our
 		 * lock and try again.
 		 */
-		if (sx_try_upgrade(&dh->dh_lock))
+		if (excl || sx_try_upgrade(&dh->dh_lock))
 			break;
 		sx_sunlock(&dh->dh_lock);
+		excl = true;
 	}
 	/* Free the preallocated structure if it was not necessary. */
 	if (ndh) {
@@ -273,11 +278,9 @@
 ufsdirhash_acquire(struct inode *ip)
 {
 	struct dirhash *dh;
-	struct vnode *vp;
 
 	ASSERT_VOP_ELOCKED(ip->i_vnode, __FUNCTION__);
 
-	vp = ip->i_vnode;
 	dh = ip->i_dirhash;
 	if (dh == NULL)
 		return (NULL);
@@ -1248,7 +1251,12 @@
 {
 	struct dirhash *dh, *dh_temp;
 	int memfreed = 0;
-	/* XXX: this 10% may need to be adjusted */
+	/* 
+	 * Will free a *minimum* of 10% of the dirhash, but possibly much
+	 * more (depending on dirhashreclaimage). System with large dirhashes
+	 * probably also need a much larger dirhashreclaimage.
+	 * XXX: this percentage may need to be adjusted.
+	 */
 	int memwanted = ufs_dirhashmem / 10;
 
 	ufs_dirhashlowmemcount++;

Modified: trunk/sys/ufs/ufs/ufs_extattr.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_extattr.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_extattr.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1999-2002 Robert N. M. Watson
  * Copyright (c) 2002-2003 Networks Associates Technology, Inc.
@@ -38,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_extattr.c 302233 2016-06-27 21:44:27Z bdrewery $");
 
 #include "opt_ufs.h"
 
@@ -69,6 +70,8 @@
 
 #ifdef UFS_EXTATTR
 
+FEATURE(ufs_extattr, "ufs extended attribute support");
+
 static MALLOC_DEFINE(M_UFS_EXTATTR, "ufs_extattr", "ufs extended attribute");
 
 static int ufs_extattr_sync = 0;
@@ -108,7 +111,7 @@
  * backing file anyway.
  */
 static void
-ufs_extattr_uepm_lock(struct ufsmount *ump, struct thread *td)
+ufs_extattr_uepm_lock(struct ufsmount *ump)
 {
 
 	sx_xlock(&ump->um_extattr.uepm_lock);
@@ -115,7 +118,7 @@
 }
 
 static void
-ufs_extattr_uepm_unlock(struct ufsmount *ump, struct thread *td)
+ufs_extattr_uepm_unlock(struct ufsmount *ump)
 {
 
 	sx_xunlock(&ump->um_extattr.uepm_lock);
@@ -213,9 +216,9 @@
 
 	ump = VFSTOUFS(mp);
 
-	ufs_extattr_uepm_lock(ump, td);
+	ufs_extattr_uepm_lock(ump);
 	error = ufs_extattr_start_locked(ump, td);
-	ufs_extattr_uepm_unlock(ump, td);
+	ufs_extattr_uepm_unlock(ump);
 	return (error);
 }
 
@@ -335,6 +338,8 @@
 	}
 
 	VOP_ADD_WRITECOUNT(vp, 1);
+	CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d", __func__, vp,
+	    vp->v_writecount);
 
 	vref(vp);
 
@@ -397,20 +402,8 @@
 			return (error);
 		}
 
-		/*
-		 * XXXRW: While in UFS, we always get DIRBLKSIZ returns from
-		 * the directory code on success, on other file systems this
-		 * may not be the case.  For portability, we should check the
-		 * read length on return from ufs_readdir().
-		 */
-		edp = (struct dirent *)&dirbuf[DIRBLKSIZ];
+		edp = (struct dirent *)&dirbuf[DIRBLKSIZ - auio.uio_resid];
 		for (dp = (struct dirent *)dirbuf; dp < edp; ) {
-#if (BYTE_ORDER == LITTLE_ENDIAN)
-			dp->d_type = dp->d_namlen;
-			dp->d_namlen = 0;
-#else
-			dp->d_type = 0;
-#endif
 			if (dp->d_reclen == 0)
 				break;
 			error = ufs_extattr_lookup(dvp, UE_GETDIR_LOCKPARENT,
@@ -456,9 +449,9 @@
 	int error;
 
 	ump = VFSTOUFS(mp);
-	ufs_extattr_uepm_lock(ump, td);
+	ufs_extattr_uepm_lock(ump);
 	error = ufs_extattr_autostart_locked(mp, td);
-	ufs_extattr_uepm_unlock(ump, td);
+	ufs_extattr_uepm_unlock(ump);
 	return (error);
 }
 
@@ -564,7 +557,7 @@
 	struct ufsmount *ump = VFSTOUFS(mp);
 	int error = 0;
 
-	ufs_extattr_uepm_lock(ump, td);
+	ufs_extattr_uepm_lock(ump);
 
 	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) {
 		error = EOPNOTSUPP;
@@ -582,7 +575,7 @@
 	ump->um_extattr.uepm_ucred = NULL;
 
 unlock:
-	ufs_extattr_uepm_unlock(ump, td);
+	ufs_extattr_uepm_unlock(ump);
 
 	return (error);
 }
@@ -607,8 +600,6 @@
 
 	attribute = malloc(sizeof(struct ufs_extattr_list_entry),
 	    M_UFS_EXTATTR, M_WAITOK);
-	if (attribute == NULL)
-		return (ENOMEM);
 
 	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) {
 		error = EOPNOTSUPP;
@@ -781,10 +772,10 @@
 		 * ufs_extattr_enable_with_open() will always unlock the
 		 * vnode, regardless of failure.
 		 */
-		ufs_extattr_uepm_lock(ump, td);
+		ufs_extattr_uepm_lock(ump);
 		error = ufs_extattr_enable_with_open(ump, filename_vp,
 		    attrnamespace, attrname, td);
-		ufs_extattr_uepm_unlock(ump, td);
+		ufs_extattr_uepm_unlock(ump);
 
 		return (error);
 
@@ -797,10 +788,10 @@
 		if (attrname == NULL)
 			return (EINVAL);
 
-		ufs_extattr_uepm_lock(ump, td);
+		ufs_extattr_uepm_lock(ump);
 		error = ufs_extattr_disable(ump, attrnamespace, attrname,
 		    td);
-		ufs_extattr_uepm_unlock(ump, td);
+		ufs_extattr_uepm_unlock(ump);
 
 		return (error);
 
@@ -830,12 +821,12 @@
 	struct ufsmount *ump = VFSTOUFS(mp);
 	int error;
 
-	ufs_extattr_uepm_lock(ump, ap->a_td);
+	ufs_extattr_uepm_lock(ump);
 
 	error = ufs_extattr_get(ap->a_vp, ap->a_attrnamespace, ap->a_name,
 	    ap->a_uio, ap->a_size, ap->a_cred, ap->a_td);
 
-	ufs_extattr_uepm_unlock(ump, ap->a_td);
+	ufs_extattr_uepm_unlock(ump);
 
 	return (error);
 }
@@ -932,8 +923,8 @@
 		 * is to coerce this to undefined, and let it get cleaned
 		 * up by the next write or extattrctl clean.
 		 */
-		printf("ufs_extattr_get (%s): inode number inconsistency (%d, %jd)\n",
-		    mp->mnt_stat.f_mntonname, ueh.ueh_i_gen, (intmax_t)ip->i_gen);
+		printf("ufs_extattr_get (%s): inode number inconsistency (%d, %ju)\n",
+		    mp->mnt_stat.f_mntonname, ueh.ueh_i_gen, (uintmax_t)ip->i_gen);
 		error = ENOATTR;
 		goto vopunlock_exit;
 	}
@@ -1000,13 +991,13 @@
 	struct ufsmount *ump = VFSTOUFS(mp); 
 	int error;
 
-	ufs_extattr_uepm_lock(ump, ap->a_td);
+	ufs_extattr_uepm_lock(ump);
 
 	error = ufs_extattr_rm(ap->a_vp, ap->a_attrnamespace, ap->a_name,
 	    ap->a_cred, ap->a_td);
 
 
-	ufs_extattr_uepm_unlock(ump, ap->a_td);
+	ufs_extattr_uepm_unlock(ump);
 
 	return (error);
 }
@@ -1037,12 +1028,12 @@
 	if (ap->a_uio == NULL)
 		return (EINVAL);
 
-	ufs_extattr_uepm_lock(ump, ap->a_td);
+	ufs_extattr_uepm_lock(ump);
 
 	error = ufs_extattr_set(ap->a_vp, ap->a_attrnamespace, ap->a_name,
 	    ap->a_uio, ap->a_cred, ap->a_td);
 
-	ufs_extattr_uepm_unlock(ump, ap->a_td);
+	ufs_extattr_uepm_unlock(ump);
 
 	return (error);
 }
@@ -1293,10 +1284,10 @@
 	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED))
 		return;
 
-	ufs_extattr_uepm_lock(ump, td);
+	ufs_extattr_uepm_lock(ump);
 
 	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) {
-		ufs_extattr_uepm_unlock(ump, td);
+		ufs_extattr_uepm_unlock(ump);
 		return;
 	}
 
@@ -1304,7 +1295,7 @@
 		ufs_extattr_rm(vp, uele->uele_attrnamespace,
 		    uele->uele_attrname, NULL, td);
 
-	ufs_extattr_uepm_unlock(ump, td);
+	ufs_extattr_uepm_unlock(ump);
 }
 
 #endif /* !UFS_EXTATTR */

Modified: trunk/sys/ufs/ufs/ufs_extern.h
===================================================================
--- trunk/sys/ufs/ufs/ufs_extern.h	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_extern.h	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
@@ -27,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)ufs_extern.h	8.10 (Berkeley) 5/14/95
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/ufs_extern.h 262779 2014-03-05 04:23:19Z pfg $
  */
 
 #ifndef _UFS_UFS_EXTERN_H_
@@ -98,7 +99,6 @@
 void	softdep_setup_directory_change(struct buf *, struct inode *,
 	    struct inode *, ino_t, int);
 void	softdep_change_linkcnt(struct inode *);
-void	softdep_releasefile(struct inode *);
 int	softdep_slowdown(struct vnode *);
 void	softdep_setup_create(struct inode *, struct inode *);
 void	softdep_setup_dotdot_link(struct inode *, struct inode *);
@@ -107,7 +107,6 @@
 void	softdep_setup_rmdir(struct inode *, struct inode *);
 void	softdep_setup_unlink(struct inode *, struct inode *);
 void	softdep_revert_create(struct inode *, struct inode *);
-void	softdep_revert_dotdot_link(struct inode *, struct inode *);
 void	softdep_revert_link(struct inode *, struct inode *);
 void	softdep_revert_mkdir(struct inode *, struct inode *);
 void	softdep_revert_rmdir(struct inode *, struct inode *);
@@ -119,10 +118,11 @@
  * Note: The general vfs code typically limits the sequential heuristic
  * count to 127.  See sequential_heuristic() in kern/vfs_vnops.c
  */
-#define BA_CLRBUF	0x00010000	/* Clear invalid areas of buffer. */
-#define BA_METAONLY	0x00020000	/* Return indirect block buffer. */
-#define BA_SEQMASK	0x7F000000	/* Bits holding seq heuristic. */
-#define BA_SEQSHIFT	24
-#define BA_SEQMAX	0x7F
+#define	BA_CLRBUF	0x00010000	/* Clear invalid areas of buffer. */
+#define	BA_METAONLY	0x00020000	/* Return indirect block buffer. */
+#define	BA_UNMAPPED	0x00040000	/* Do not mmap resulted buffer. */
+#define	BA_SEQMASK	0x7F000000	/* Bits holding seq heuristic. */
+#define	BA_SEQSHIFT	24
+#define	BA_SEQMAX	0x7F
 
 #endif /* !_UFS_UFS_EXTERN_H_ */

Modified: trunk/sys/ufs/ufs/ufs_gjournal.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_gjournal.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_gjournal.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd at FreeBSD.org>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_gjournal.c 306630 2016-10-03 10:15:16Z kib $");
 
 #include "opt_ufs.h"
 
@@ -70,14 +71,17 @@
 	ino = ip->i_number;
 
 	cg = ino_to_cg(fs, ino);
-	if (devvp->v_type != VCHR) {
+	if (devvp->v_type == VREG) {
 		/* devvp is a snapshot */
 		dev = VTOI(devvp)->i_devvp->v_rdev;
 		cgbno = fragstoblks(fs, cgtod(fs, cg));
-	} else {
+	} else if (devvp->v_type == VCHR) {
 		/* devvp is a normal disk device */
 		dev = devvp->v_rdev;
 		cgbno = fsbtodb(fs, cgtod(fs, cg));
+	} else {
+		bp = NULL;
+		return (EIO);
 	}
 	if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
 		panic("ufs_gjournal_modref: range: dev = %s, ino = %lu, fs = %s",

Modified: trunk/sys/ufs/ufs/ufs_inode.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_inode.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_inode.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1991, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
@@ -35,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_inode.c 234612 2012-04-23 17:54:49Z trasz $");
 
 #include "opt_quota.h"
 #include "opt_ufs.h"
@@ -73,7 +74,6 @@
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
-	struct thread *td = ap->a_td;
 	mode_t mode;
 	int error = 0;
 	off_t isize;
@@ -129,8 +129,7 @@
 	if (ip->i_ump->um_fstype == UFS2)
 		isize += ip->i_din2->di_extsize;
 	if (ip->i_effnlink <= 0 && isize && !UFS_RDONLY(ip))
-		error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL,
-		    NOCRED, td);
+		error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL, NOCRED);
 	if (ip->i_nlink <= 0 && ip->i_mode && !UFS_RDONLY(ip)) {
 #ifdef QUOTA
 		if (!getinoquota(ip))
@@ -137,7 +136,7 @@
 			(void)chkiq(ip, -1, NOCRED, FORCE);
 #endif
 #ifdef UFS_EXTATTR
-		ufs_extattr_vnode_inactive(vp, td);
+		ufs_extattr_vnode_inactive(vp, ap->a_td);
 #endif
 		/*
 		 * Setting the mode to zero needs to wait for the inode
@@ -173,7 +172,7 @@
 	 * so that it can be reused immediately.
 	 */
 	if (ip->i_mode == 0)
-		vrecycle(vp, td);
+		vrecycle(vp);
 	if (mp != NULL)
 		vn_finished_secondary_write(mp);
 	return (error);

Modified: trunk/sys/ufs/ufs/ufs_lookup.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_lookup.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_lookup.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -35,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_lookup.c 306180 2016-09-22 10:51:47Z kib $");
 
 #include "opt_ufs.h"
 #include "opt_quota.h"
@@ -76,33 +77,7 @@
 /* true if old FS format...*/
 #define OFSFMT(vp)	((vp)->v_mount->mnt_maxsymlinklen <= 0)
 
-#ifdef QUOTA
 static int
-ufs_lookup_upgrade_lock(struct vnode *vp)
-{
-	int error;
-
-	ASSERT_VOP_LOCKED(vp, __FUNCTION__);
-	if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE)
-		return (0);
-
-	error = 0;
-
-	/*
-	 * Upgrade vnode lock, since getinoquota()
-	 * requires exclusive lock to modify inode.
-	 */
-	vhold(vp);
-	vn_lock(vp, LK_UPGRADE | LK_RETRY);
-	VI_LOCK(vp);
-	if (vp->v_iflag & VI_DOOMED)
-		error = ENOENT;
-	vdropl(vp);
-	return (error);
-}
-#endif
-
-static int
 ufs_delete_denied(struct vnode *vdp, struct vnode *tdp, struct ucred *cred,
     struct thread *td)
 {
@@ -259,12 +234,25 @@
 	vnode_create_vobject(vdp, DIP(dp, i_size), cnp->cn_thread);
 
 	bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
-#ifdef QUOTA
-	if ((nameiop == DELETE || nameiop == RENAME) && (flags & ISLASTCN)) {
-		error = ufs_lookup_upgrade_lock(vdp);
-		if (error != 0)
-			return (error);
-	}
+
+#ifdef DEBUG_VFS_LOCKS
+	/*
+	 * Assert that the directory vnode is locked, and locked
+	 * exclusively for the last component lookup for modifying
+	 * operations.
+	 *
+	 * The directory-modifying operations need to save
+	 * intermediate state in the inode between namei() call and
+	 * actual directory manipulations.  See fields in the struct
+	 * inode marked as 'used during directory lookup'.  We must
+	 * ensure that upgrade in namei() does not happen, since
+	 * upgrade might need to unlock vdp.  If quotas are enabled,
+	 * getinoquota() also requires exclusive lock to modify inode.
+	 */
+	ASSERT_VOP_LOCKED(vdp, "ufs_lookup1");
+	if ((nameiop == CREATE || nameiop == DELETE || nameiop == RENAME) &&
+	    (flags & (LOCKPARENT | ISLASTCN)) == (LOCKPARENT | ISLASTCN))
+		ASSERT_VOP_ELOCKED(vdp, "ufs_lookup2");
 #endif
 
 restart:
@@ -550,7 +538,7 @@
 	/*
 	 * Insert name into cache (as non-existent) if appropriate.
 	 */
-	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
+	if ((cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(vdp, NULL, cnp);
 	return (ENOENT);
 
@@ -770,11 +758,13 @@
 
 	mp = ITOV(ip)->v_mount;
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
-		panic("ufs_dirbad: %s: bad dir ino %lu at offset %ld: %s",
-		    mp->mnt_stat.f_mntonname, (u_long)ip->i_number, (long)offset, how);
+		panic("ufs_dirbad: %s: bad dir ino %ju at offset %ld: %s",
+		    mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number,
+		    (long)offset, how);
 	else
-		(void)printf("%s: bad dir ino %lu at offset %ld: %s\n",
-		    mp->mnt_stat.f_mntonname, (u_long)ip->i_number, (long)offset, how);
+		(void)printf("%s: bad dir ino %ju at offset %ld: %s\n",
+		    mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number,
+		    (long)offset, how);
 }
 
 /*
@@ -879,6 +869,7 @@
 	struct buf *bp;
 	u_int dsize;
 	struct direct *ep, *nep;
+	u_int64_t old_isize;
 	int error, ret, blkoff, loc, spacefree, flags, namlen;
 	char *dirbuf;
 
@@ -907,16 +898,19 @@
 			return (error);
 		}
 #endif
+		old_isize = dp->i_size;
+		vnode_pager_setsize(dvp, (u_long)dp->i_offset + DIRBLKSIZ);
 		if ((error = UFS_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ,
 		    cr, flags, &bp)) != 0) {
 			if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
 				bdwrite(newdirbp);
+			vnode_pager_setsize(dvp, (u_long)old_isize);
 			return (error);
 		}
 		dp->i_size = dp->i_offset + DIRBLKSIZ;
 		DIP_SET(dp, i_size, dp->i_size);
+		dp->i_endoff = dp->i_size;
 		dp->i_flag |= IN_CHANGE | IN_UPDATE;
-		vnode_pager_setsize(dvp, (u_long)dp->i_size);
 		dirp->d_reclen = DIRBLKSIZ;
 		blkoff = dp->i_offset &
 		    (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1);
@@ -1128,12 +1122,16 @@
 	    dp->i_endoff && dp->i_endoff < dp->i_size) {
 		if (tvp != NULL)
 			VOP_UNLOCK(tvp, 0);
+		error = UFS_TRUNCATE(dvp, (off_t)dp->i_endoff,
+		    IO_NORMAL | (DOINGASYNC(dvp) ? 0 : IO_SYNC), cr);
+		if (error != 0)
+			vn_printf(dvp, "ufs_direnter: failed to truncate "
+			    "err %d", error);
 #ifdef UFS_DIRHASH
-		if (dp->i_dirhash != NULL)
+		if (error == 0 && dp->i_dirhash != NULL)
 			ufsdirhash_dirtrunc(dp, dp->i_endoff);
 #endif
-		(void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff,
-		    IO_NORMAL | IO_SYNC, cr, td);
+		error = 0;
 		if (tvp != NULL)
 			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
 	}
@@ -1209,8 +1207,8 @@
 		ufsdirhash_remove(dp, rep, dp->i_offset);
 #endif
 	if (ip && rep->d_ino != ip->i_number)
-		panic("ufs_dirremove: ip %d does not match dirent ino %d\n",
-		    ip->i_number, rep->d_ino);
+		panic("ufs_dirremove: ip %ju does not match dirent ino %ju\n",
+		    (uintmax_t)ip->i_number, (uintmax_t)rep->d_ino);
 	if (dp->i_count == 0) {
 		/*
 		 * First entry in block: set d_ino to zero.
@@ -1251,7 +1249,8 @@
 	 * drop its snapshot reference so that it will be reclaimed
 	 * when last open reference goes away.
 	 */
-	if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 && ip->i_effnlink == 0)
+	if (ip != NULL && (ip->i_flags & SF_SNAPSHOT) != 0 &&
+	    ip->i_effnlink == 0)
 		UFS_SNAPGONE(ip);
 	return (error);
 }

Modified: trunk/sys/ufs/ufs/ufs_quota.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_quota.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_quota.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1986, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
@@ -33,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_quota.c 306178 2016-09-22 10:47:56Z kib $");
 
 #include "opt_ffs.h"
 
@@ -307,7 +308,6 @@
 chkiq(struct inode *ip, int change, struct ucred *cred, int flags)
 {
 	struct dquot *dq;
-	ino_t ncurinodes;
 	int i, error, warn, do_check;
 
 #ifdef DIAGNOSTIC
@@ -322,10 +322,8 @@
 				continue;
 			DQI_LOCK(dq);
 			DQI_WAIT(dq, PINOD+1, "chkiq1");
-			ncurinodes = dq->dq_curinodes + change;
-			/* XXX: ncurinodes is unsigned */
-			if (dq->dq_curinodes != 0 && ncurinodes >= 0)
-				dq->dq_curinodes = ncurinodes;
+			if (dq->dq_curinodes >= -change)
+				dq->dq_curinodes += change;
 			else
 				dq->dq_curinodes = 0;
 			dq->dq_flags &= ~DQ_INODS;
@@ -359,11 +357,8 @@
 						continue;
 					DQI_LOCK(dq);
 					DQI_WAIT(dq, PINOD+1, "chkiq3");
-					ncurinodes = dq->dq_curinodes - change;
-					/* XXX: ncurinodes is unsigned */
-					if (dq->dq_curinodes != 0 &&
-					    ncurinodes >= 0)
-						dq->dq_curinodes = ncurinodes;
+					if (dq->dq_curinodes >= change)
+						dq->dq_curinodes -= change;
 					else
 						dq->dq_curinodes = 0;
 					dq->dq_flags &= ~DQ_INODS;
@@ -497,20 +492,24 @@
 	struct vnode *vp, **vpp;
 	struct vnode *mvp;
 	struct dquot *dq;
-	int error, flags, vfslocked;
+	int error, flags;
 	struct nameidata nd;
 
 	error = priv_check(td, PRIV_UFS_QUOTAON);
-	if (error)
+	if (error != 0) {
+		vfs_unbusy(mp);
 		return (error);
+	}
 
-	if (mp->mnt_flag & MNT_RDONLY)
+	if ((mp->mnt_flag & MNT_RDONLY) != 0) {
+		vfs_unbusy(mp);
 		return (EROFS);
+	}
 
 	ump = VFSTOUFS(mp);
 	dq = NODQUOT;
 
-	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE, fname, td);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fname, td);
 	flags = FREAD | FWRITE;
 	vfs_ref(mp);
 	vfs_unbusy(mp);
@@ -519,7 +518,6 @@
 		vfs_rel(mp);
 		return (error);
 	}
-	vfslocked = NDHASGIANT(&nd);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 	error = vfs_busy(mp, MBF_NOWAIT);
@@ -533,7 +531,6 @@
 	if (error != 0) {
 		VOP_UNLOCK(vp, 0);
 		(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
-		VFS_UNLOCK_GIANT(vfslocked);
 		return (error);
 	}
 
@@ -542,7 +539,6 @@
 		UFS_UNLOCK(ump);
 		VOP_UNLOCK(vp, 0);
 		(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
-		VFS_UNLOCK_GIANT(vfslocked);
 		vfs_unbusy(mp);
 		return (EALREADY);
 	}
@@ -554,7 +550,6 @@
 		ump->um_qflags[type] &= ~(QTF_OPENING|QTF_CLOSING);
 		UFS_UNLOCK(ump);
 		(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
-		VFS_UNLOCK_GIANT(vfslocked);
 		vfs_unbusy(mp);
 		return (error);
 	}
@@ -567,11 +562,23 @@
 	if (*vpp != vp)
 		quotaoff1(td, mp, type);
 
+	/*
+	 * When the directory vnode containing the quota file is
+	 * inactivated, due to the shared lookup of the quota file
+	 * vput()ing the dvp, the qsyncvp() call for the containing
+	 * directory would try to acquire the quota lock exclusive.
+	 * At the same time, lookup already locked the quota vnode
+	 * shared.  Mark the quota vnode lock as allowing recursion
+	 * and automatically converting shared locks to exclusive.
+	 *
+	 * Also mark quota vnode as system.
+	 */
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	vp->v_vflag |= VV_SYSTEM;
+	VN_LOCK_AREC(vp);
+	VN_LOCK_DSHARE(vp);
 	VOP_UNLOCK(vp, 0);
 	*vpp = vp;
-	VFS_UNLOCK_GIANT(vfslocked);
 	/*
 	 * Save the credential of the process that turned on quotas.
 	 * Set up the time limits for this quota.
@@ -643,7 +650,6 @@
 	struct dquot *dq;
 	struct inode *ip;
 	struct ucred *cr;
-	int vfslocked;
 	int error;
 
 	ump = VFSTOUFS(mp);
@@ -693,12 +699,10 @@
 	ump->um_cred[type] = NOCRED;
 	UFS_UNLOCK(ump);
 
-	vfslocked = VFS_LOCK_GIANT(qvp->v_mount);
 	vn_lock(qvp, LK_EXCLUSIVE | LK_RETRY);
 	qvp->v_vflag &= ~VV_SYSTEM;
 	VOP_UNLOCK(qvp, 0);
 	error = vn_close(qvp, FREAD|FWRITE, td->td_ucred, td);
-	VFS_UNLOCK_GIANT(vfslocked);
 	crfree(cr);
 
 	return (error);
@@ -1250,7 +1254,7 @@
 	struct vnode *dqvp;
 	struct iovec aiov;
 	struct uio auio;
-	int vfslocked, dqvplocked, error;
+	int dqvplocked, error;
 
 #ifdef DEBUG_VFS_LOCKS
 	if (vp != NULLVP)
@@ -1296,12 +1300,10 @@
 			error = EIO;
 		}
 		*dqp = dq;
-		vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
 		if (dqvplocked)
 			vput(dqvp);
 		else
 			vrele(dqvp);
-		VFS_UNLOCK_GIANT(vfslocked);
 		return (error);
 	}
 
@@ -1354,12 +1356,10 @@
 			DQH_UNLOCK();
 			tablefull("dquot");
 			*dqp = NODQUOT;
-			vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
 			if (dqvplocked)
 				vput(dqvp);
 			else
 				vrele(dqvp);
-			VFS_UNLOCK_GIANT(vfslocked);
 			return (EUSERS);
 		}
 		if (dq->dq_cnt || (dq->dq_flags & DQ_MOD))
@@ -1402,7 +1402,6 @@
 	auio.uio_rw = UIO_READ;
 	auio.uio_td = (struct thread *)0;
 
-	vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
 	error = VOP_READ(dqvp, &auio, 0, ump->um_cred[type]);
 	if (auio.uio_resid == recsize && error == 0) {
 		bzero(&dq->dq_dqb, sizeof(dq->dq_dqb));
@@ -1416,7 +1415,6 @@
 		vput(dqvp);
 	else
 		vrele(dqvp);
-	VFS_UNLOCK_GIANT(vfslocked);
 	/*
 	 * I/O error in reading quota file, release
 	 * quota structure and reflect problem to caller.
@@ -1529,7 +1527,7 @@
 	struct vnode *dqvp;
 	struct iovec aiov;
 	struct uio auio;
-	int vfslocked, error;
+	int error;
 	struct mount *mp;
 	struct ufsmount *ump;
 
@@ -1545,17 +1543,20 @@
 	if ((ump = dq->dq_ump) == NULL)
 		return (0);
 	UFS_LOCK(ump);
-	if ((dqvp = ump->um_quotas[dq->dq_type]) == NULLVP)
-		panic("dqsync: file");
+	if ((dqvp = ump->um_quotas[dq->dq_type]) == NULLVP) {
+		if (vp == NULL) {
+			UFS_UNLOCK(ump);
+			return (0);
+		} else
+			panic("dqsync: file");
+	}
 	vref(dqvp);
 	UFS_UNLOCK(ump);
 
-	vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
 	DQI_LOCK(dq);
 	if ((dq->dq_flags & DQ_MOD) == 0) {
 		DQI_UNLOCK(dq);
 		vrele(dqvp);
-		VFS_UNLOCK_GIANT(vfslocked);
 		return (0);
 	}
 	DQI_UNLOCK(dq);
@@ -1564,7 +1565,6 @@
 	if (vp != dqvp)
 		vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY);
 
-	VFS_UNLOCK_GIANT(vfslocked);
 	DQI_LOCK(dq);
 	DQI_WAIT(dq, PINOD+2, "dqsync");
 	if ((dq->dq_flags & DQ_MOD) == 0)
@@ -1595,9 +1595,7 @@
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_td = (struct thread *)0;
-	vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
 	error = VOP_WRITE(dqvp, &auio, 0, dq->dq_ump->um_cred[dq->dq_type]);
-	VFS_UNLOCK_GIANT(vfslocked);
 	if (auio.uio_resid && error == 0)
 		error = EIO;
 
@@ -1606,13 +1604,11 @@
 	dq->dq_flags &= ~DQ_MOD;
 out:
 	DQI_UNLOCK(dq);
-	vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
 	if (vp != dqvp)
 		vput(dqvp);
 	else
 		vrele(dqvp);
 	vn_finished_secondary_write(mp);
-	VFS_UNLOCK_GIANT(vfslocked);
 	return (error);
 }
 

Modified: trunk/sys/ufs/ufs/ufs_vfsops.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_vfsops.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_vfsops.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
@@ -35,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_vfsops.c 278150 2015-02-03 11:54:33Z kib $");
 
 #include "opt_quota.h"
 #include "opt_ufs.h"
@@ -92,6 +93,9 @@
 	void *arg;
 {
 #ifndef QUOTA
+	if ((cmds >> SUBCMDSHIFT) == Q_QUOTAON)
+		vfs_unbusy(mp);
+
 	return (EOPNOTSUPP);
 #else
 	struct thread *td;
@@ -112,11 +116,16 @@
 			break;
 
 		default:
+			if (cmd == Q_QUOTAON)
+				vfs_unbusy(mp);
 			return (EINVAL);
 		}
 	}
-	if ((u_int)type >= MAXQUOTAS)
+	if ((u_int)type >= MAXQUOTAS) {
+		if (cmd == Q_QUOTAON)
+			vfs_unbusy(mp);
 		return (EINVAL);
+	}
 
 	switch (cmd) {
 	case Q_QUOTAON:

Modified: trunk/sys/ufs/ufs/ufs_vnops.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_vnops.c	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_vnops.c	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
@@ -35,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_vnops.c 332750 2018-04-19 02:50:15Z pfg $");
 
 #include "opt_quota.h"
 #include "opt_suiddir.h"
@@ -69,8 +70,6 @@
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
-#include <fs/fifofs/fifo.h>
-
 #include <ufs/ufs/acl.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
@@ -107,7 +106,7 @@
 static vop_getattr_t	ufs_getattr;
 static vop_ioctl_t	ufs_ioctl;
 static vop_link_t	ufs_link;
-static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *);
+static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *, const char *);
 static vop_markatime_t	ufs_markatime;
 static vop_mkdir_t	ufs_mkdir;
 static vop_mknod_t	ufs_mknod;
@@ -206,9 +205,11 @@
 
 	error =
 	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
-	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
-	if (error)
+	    ap->a_dvp, ap->a_vpp, ap->a_cnp, "ufs_create");
+	if (error != 0)
 		return (error);
+	if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0)
+		cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp);
 	return (0);
 }
 
@@ -232,7 +233,7 @@
 	int error;
 
 	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
-	    ap->a_dvp, vpp, ap->a_cnp);
+	    ap->a_dvp, vpp, ap->a_cnp, "ufs_mknod");
 	if (error)
 		return (error);
 	ip = VTOI(*vpp);
@@ -530,9 +531,11 @@
 		return (EINVAL);
 	}
 	if (vap->va_flags != VNOVAL) {
-		if ((vap->va_flags & ~(UF_NODUMP | UF_IMMUTABLE | UF_APPEND |
-		    UF_OPAQUE | UF_NOUNLINK | SF_ARCHIVED | SF_IMMUTABLE |
-		    SF_APPEND | SF_NOUNLINK | SF_SNAPSHOT)) != 0)
+		if ((vap->va_flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE |
+		    SF_NOUNLINK | SF_SNAPSHOT | UF_APPEND | UF_ARCHIVE |
+		    UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK |
+		    UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE |
+		    UF_SPARSE | UF_SYSTEM)) != 0)
 			return (EOPNOTSUPP);
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
@@ -559,23 +562,17 @@
 				if (error)
 					return (error);
 			}
-			/* Snapshot flag cannot be set or cleared */
-			if (((vap->va_flags & SF_SNAPSHOT) != 0 &&
-			     (ip->i_flags & SF_SNAPSHOT) == 0) ||
-			    ((vap->va_flags & SF_SNAPSHOT) == 0 &&
-			     (ip->i_flags & SF_SNAPSHOT) != 0))
+			/* The snapshot flag cannot be toggled. */
+			if ((vap->va_flags ^ ip->i_flags) & SF_SNAPSHOT)
 				return (EPERM);
-			ip->i_flags = vap->va_flags;
-			DIP_SET(ip, i_flags, vap->va_flags);
 		} else {
 			if (ip->i_flags &
 			    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
-			    (vap->va_flags & UF_SETTABLE) != vap->va_flags)
+			    ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE))
 				return (EPERM);
-			ip->i_flags &= SF_SETTABLE;
-			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
-			DIP_SET(ip, i_flags, ip->i_flags);
 		}
+		ip->i_flags = vap->va_flags;
+		DIP_SET(ip, i_flags, vap->va_flags);
 		ip->i_flag |= IN_CHANGE;
 		error = UFS_UPDATE(vp, 0);
 		if (ip->i_flags & (IMMUTABLE | APPEND))
@@ -630,8 +627,9 @@
 			 */
 			return (0);
 		}
-		if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL,
-		    cred, td)) != 0)
+		if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL |
+		    ((vap->va_vaflags & VA_SYNC) != 0 ? IO_SYNC : 0),
+		    cred)) != 0)
 			return (error);
 	}
 	if (vap->va_atime.tv_sec != VNOVAL ||
@@ -641,49 +639,17 @@
 			return (EROFS);
 		if ((ip->i_flags & SF_SNAPSHOT) != 0)
 			return (EPERM);
-		/*
-		 * From utimes(2):
-		 * If times is NULL, ... The caller must be the owner of
-		 * the file, have permission to write the file, or be the
-		 * super-user.
-		 * If times is non-NULL, ... The caller must be the owner of
-		 * the file or be the super-user.
-		 *
-		 * Possibly for historical reasons, try to use VADMIN in
-		 * preference to VWRITE for a NULL timestamp.  This means we
-		 * will return EACCES in preference to EPERM if neither
-		 * check succeeds.
-		 */
-		if (vap->va_vaflags & VA_UTIMES_NULL) {
-			/*
-			 * NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes
-			 *
-			 * "A user having ACL_WRITE_DATA or ACL_WRITE_ATTRIBUTES
-			 * will be allowed to set the times [..] to the current
-			 * server time."
-			 *
-			 * XXX: Calling it four times seems a little excessive.
-			 */
-			error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td);
-			if (error)
-				error = VOP_ACCESS(vp, VWRITE, cred, td);
-		} else
-			error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td);
-		if (error)
+		error = vn_utimes_perm(vp, vap, cred, td);
+		if (error != 0)
 			return (error);
-		if (vap->va_atime.tv_sec != VNOVAL)
-			ip->i_flag |= IN_ACCESS;
-		if (vap->va_mtime.tv_sec != VNOVAL)
-			ip->i_flag |= IN_CHANGE | IN_UPDATE;
-		if (vap->va_birthtime.tv_sec != VNOVAL &&
-		    ip->i_ump->um_fstype == UFS2)
-			ip->i_flag |= IN_MODIFIED;
-		ufs_itimes(vp);
+		ip->i_flag |= IN_CHANGE | IN_MODIFIED;
 		if (vap->va_atime.tv_sec != VNOVAL) {
+			ip->i_flag &= ~IN_ACCESS;
 			DIP_SET(ip, i_atime, vap->va_atime.tv_sec);
 			DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec);
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
+			ip->i_flag &= ~IN_UPDATE;
 			DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec);
 			DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec);
 		}
@@ -979,6 +945,17 @@
 	return (error);
 }
 
+static void
+print_bad_link_count(const char *funcname, struct vnode *dvp)
+{
+	struct inode *dip;
+
+	dip = VTOI(dvp);
+	uprintf("%s: Bad link count %d on parent inode %d in file system %s\n",
+	    funcname, dip->i_effnlink, dip->i_number,
+	    dvp->v_mount->mnt_stat.f_mntonname);
+}
+
 /*
  * link vnode call
  */
@@ -1001,13 +978,11 @@
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("ufs_link: no name");
 #endif
-	if (tdvp->v_mount != vp->v_mount) {
-		error = EXDEV;
+	if (VTOI(tdvp)->i_effnlink < 2) {
+		print_bad_link_count("ufs_link", tdvp);
+		error = EINVAL;
 		goto out;
 	}
-	if (VTOI(tdvp)->i_effnlink < 2)
-		panic("ufs_link: Bad link count %d on parent",
-		    VTOI(tdvp)->i_effnlink);
 	ip = VTOI(vp);
 	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
 		error = EMLINK;
@@ -1179,11 +1154,6 @@
 		mp = NULL;
 		goto releout;
 	}
-	error = vfs_busy(mp, 0);
-	if (error) {
-		mp = NULL;
-		goto releout;
-	}
 relock:
 	/* 
 	 * We need to acquire 2 to 4 locks depending on whether tvp is NULL
@@ -1271,7 +1241,7 @@
 			error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
 			if (error != 0)
 				goto releout;
-			VOP_UNLOCK(nvp, 0);
+			vput(nvp);
 			atomic_add_int(&rename_restarts, 1);
 			goto relock;
 		}
@@ -1512,8 +1482,8 @@
 		if (error)
 			panic("ufs_rename: from entry went away!");
 		if (ino != fip->i_number)
-			panic("ufs_rename: ino mismatch %d != %d\n", ino,
-			    fip->i_number);
+			panic("ufs_rename: ino mismatch %ju != %ju\n",
+			    (uintmax_t)ino, (uintmax_t)fip->i_number);
 	}
 	/*
 	 * If the source is a directory with a
@@ -1574,18 +1544,25 @@
 	 * are no longer needed.
 	 */
 	if (error == 0 && endoff != 0) {
+		error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | IO_SYNC,
+		    tcnp->cn_cred);
+		if (error != 0)
+			vn_printf(tdvp, "ufs_rename: failed to truncate "
+			    "err %d", error);
 #ifdef UFS_DIRHASH
-		if (tdp->i_dirhash != NULL)
+		else if (tdp->i_dirhash != NULL)
 			ufsdirhash_dirtrunc(tdp, endoff);
 #endif
-		UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | IO_SYNC, tcnp->cn_cred,
-		    td);
+		/*
+		 * Even if the directory compaction failed, rename was
+		 * succesful.  Do not propagate a UFS_TRUNCATE() error
+		 * to the caller.
+		 */
+		error = 0;
 	}
 	if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
 		error = VOP_FSYNC(tdvp, MNT_WAIT, td);
 	vput(tdvp);
-	if (mp)
-		vfs_unbusy(mp);
 	return (error);
 
 bad:
@@ -1603,8 +1580,6 @@
 	vrele(tdvp);
 	if (tvp)
 		vrele(tvp);
-	if (mp)
-		vfs_unbusy(mp);
 
 	return (error);
 }
@@ -1751,10 +1726,10 @@
 		 * XXX: This should not happen, as EOPNOTSUPP above was
 		 * supposed to free acl.
 		 */
-		printf("ufs_makeinode: VOP_GETACL() but no "
-		    "VOP_SETACL()\n");
-		/* panic("ufs_makeinode: VOP_GETACL() but no "
-		    "VOP_SETACL()"); */
+		printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
+		    "but no VOP_SETACL()\n");
+		/* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
+		    "but no VOP_SETACL()"); */
 		break;
 
 	default:
@@ -1832,6 +1807,11 @@
 	 * but not have it entered in the parent directory. The entry is
 	 * made later after writing "." and ".." entries.
 	 */
+	if (dp->i_effnlink < 2) {
+		print_bad_link_count("ufs_mkdir", dvp);
+		error = EINVAL;
+		goto out;
+	}
 	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
 	if (error)
 		goto out;
@@ -1963,6 +1943,7 @@
 	dirtemplate = *dtp;
 	dirtemplate.dot_ino = ip->i_number;
 	dirtemplate.dotdot_ino = dp->i_number;
+	vnode_pager_setsize(tvp, DIRBLKSIZ);
 	if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
 	    BA_CLRBUF, &bp)) != 0)
 		goto bad;
@@ -1969,7 +1950,6 @@
 	ip->i_size = DIRBLKSIZ;
 	DIP_SET(ip, i_size, DIRBLKSIZ);
 	ip->i_flag |= IN_CHANGE | IN_UPDATE;
-	vnode_pager_setsize(tvp, (u_long)ip->i_size);
 	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
 	if (DOINGSOFTDEP(tvp)) {
 		/*
@@ -2062,13 +2042,12 @@
 	 * tries to remove a locally mounted on directory).
 	 */
 	error = 0;
-	if (ip->i_effnlink < 2) {
+	if (dp->i_effnlink <= 2) {
+		if (dp->i_effnlink == 2)
+			print_bad_link_count("ufs_rmdir", dvp);
 		error = EINVAL;
 		goto out;
 	}
-	if (dp->i_effnlink < 3)
-		panic("ufs_dirrem: Bad link count %d on parent",
-		    dp->i_effnlink);
 	if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
 		error = ENOTEMPTY;
 		goto out;
@@ -2147,7 +2126,7 @@
 	int len, error;
 
 	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
-	    vpp, ap->a_cnp);
+	    vpp, ap->a_cnp, "ufs_symlink");
 	if (error)
 		return (error);
 	vp = *vpp;
@@ -2170,12 +2149,6 @@
 
 /*
  * Vnode op for reading directories.
- *
- * The routine below assumes that the on-disk format of a directory
- * is the same as that defined by <sys/dirent.h>. If the on-disk
- * format changes, then it will be necessary to do a conversion
- * from the on-disk format that read returns to the format defined
- * by <sys/dirent.h>.
  */
 int
 ufs_readdir(ap)
@@ -2188,103 +2161,126 @@
 		u_long **a_cookies;
 	} */ *ap;
 {
+	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
+	struct buf *bp;
 	struct inode *ip;
+	struct direct *dp, *edp;
+	u_long *cookies;
+	struct dirent dstdp;
+	off_t offset, startoffset;
+	size_t readcnt, skipcnt;
+	ssize_t startresid;
+	int ncookies;
 	int error;
-	size_t count, lost;
-	off_t off;
 
-	if (ap->a_ncookies != NULL)
-		/*
-		 * Ensure that the block is aligned.  The caller can use
-		 * the cookies to determine where in the block to start.
-		 */
-		uio->uio_offset &= ~(DIRBLKSIZ - 1);
-	ip = VTOI(ap->a_vp);
+	if (uio->uio_offset < 0)
+		return (EINVAL);
+	ip = VTOI(vp);
 	if (ip->i_effnlink == 0)
 		return (0);
-	off = uio->uio_offset;
-	count = uio->uio_resid;
-	/* Make sure we don't return partial entries. */
-	if (count <= ((uio->uio_offset + count) & (DIRBLKSIZ -1)))
-		return (EINVAL);
-	count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
-	lost = uio->uio_resid - count;
-	uio->uio_resid = count;
-	uio->uio_iov->iov_len = count;
-#	if (BYTE_ORDER == LITTLE_ENDIAN)
-		if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
-			error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
-		} else {
-			struct dirent *dp, *edp;
-			struct uio auio;
-			struct iovec aiov;
-			caddr_t dirbuf;
-			int readcnt;
-			u_char tmp;
-
-			auio = *uio;
-			auio.uio_iov = &aiov;
-			auio.uio_iovcnt = 1;
-			auio.uio_segflg = UIO_SYSSPACE;
-			aiov.iov_len = count;
-			dirbuf = malloc(count, M_TEMP, M_WAITOK);
-			aiov.iov_base = dirbuf;
-			error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
-			if (error == 0) {
-				readcnt = count - auio.uio_resid;
-				edp = (struct dirent *)&dirbuf[readcnt];
-				for (dp = (struct dirent *)dirbuf; dp < edp; ) {
-					tmp = dp->d_namlen;
-					dp->d_namlen = dp->d_type;
-					dp->d_type = tmp;
-					if (dp->d_reclen > 0) {
-						dp = (struct dirent *)
-						    ((char *)dp + dp->d_reclen);
-					} else {
-						error = EIO;
-						break;
-					}
-				}
-				if (dp >= edp)
-					error = uiomove(dirbuf, readcnt, uio);
+	if (ap->a_ncookies != NULL) {
+		if (uio->uio_resid < 0)
+			ncookies = 0;
+		else
+			ncookies = uio->uio_resid;
+		if (uio->uio_offset >= ip->i_size)
+			ncookies = 0;
+		else if (ip->i_size - uio->uio_offset < ncookies)
+			ncookies = ip->i_size - uio->uio_offset;
+		ncookies = ncookies / (offsetof(struct direct, d_name) + 4) + 1;
+		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
+		*ap->a_ncookies = ncookies;
+		*ap->a_cookies = cookies;
+	} else {
+		ncookies = 0;
+		cookies = NULL;
+	}
+	offset = startoffset = uio->uio_offset;
+	startresid = uio->uio_resid;
+	error = 0;
+	while (error == 0 && uio->uio_resid > 0 &&
+	    uio->uio_offset < ip->i_size) {
+		error = ffs_blkatoff(vp, uio->uio_offset, NULL, &bp);
+		if (error)
+			break;
+		if (bp->b_offset + bp->b_bcount > ip->i_size)
+			readcnt = ip->i_size - bp->b_offset;
+		else
+			readcnt = bp->b_bcount;
+		skipcnt = (size_t)(uio->uio_offset - bp->b_offset) &
+		    ~(size_t)(DIRBLKSIZ - 1);
+		offset = bp->b_offset + skipcnt;
+		dp = (struct direct *)&bp->b_data[skipcnt];
+		edp = (struct direct *)&bp->b_data[readcnt];
+		while (error == 0 && uio->uio_resid > 0 && dp < edp) {
+			if (dp->d_reclen <= offsetof(struct direct, d_name) ||
+			    (caddr_t)dp + dp->d_reclen > (caddr_t)edp) {
+				error = EIO;
+				break;
 			}
-			free(dirbuf, M_TEMP);
+#if BYTE_ORDER == LITTLE_ENDIAN
+			/* Old filesystem format. */
+			if (vp->v_mount->mnt_maxsymlinklen <= 0) {
+				dstdp.d_namlen = dp->d_type;
+				dstdp.d_type = dp->d_namlen;
+			} else
+#endif
+			{
+				dstdp.d_namlen = dp->d_namlen;
+				dstdp.d_type = dp->d_type;
+			}
+			if (offsetof(struct direct, d_name) + dstdp.d_namlen >
+			    dp->d_reclen) {
+				error = EIO;
+				break;
+			}
+			if (offset < startoffset || dp->d_ino == 0)
+				goto nextentry;
+			dstdp.d_fileno = dp->d_ino;
+			dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp);
+			bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen);
+			dstdp.d_name[dstdp.d_namlen] = '\0';
+			if (dstdp.d_reclen > uio->uio_resid) {
+				if (uio->uio_resid == startresid)
+					error = EINVAL;
+				else
+					error = EJUSTRETURN;
+				break;
+			}
+			/* Advance dp. */
+			error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio);
+			if (error)
+				break;
+			if (cookies != NULL) {
+				KASSERT(ncookies > 0,
+				    ("ufs_readdir: cookies buffer too small"));
+				*cookies = offset + dp->d_reclen;
+				cookies++;
+				ncookies--;
+			}
+nextentry:
+			offset += dp->d_reclen;
+			dp = (struct direct *)((caddr_t)dp + dp->d_reclen);
 		}
-#	else
-		error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
-#	endif
-	if (!error && ap->a_ncookies != NULL) {
-		struct dirent* dpStart;
-		struct dirent* dpEnd;
-		struct dirent* dp;
-		int ncookies;
-		u_long *cookies;
-		u_long *cookiep;
-
-		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
-			panic("ufs_readdir: unexpected uio from NFS server");
-		dpStart = (struct dirent *)
-		    ((char *)uio->uio_iov->iov_base - (uio->uio_offset - off));
-		dpEnd = (struct dirent *) uio->uio_iov->iov_base;
-		for (dp = dpStart, ncookies = 0;
-		     dp < dpEnd;
-		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen))
-			ncookies++;
-		cookies = malloc(ncookies * sizeof(u_long), M_TEMP,
-		    M_WAITOK);
-		for (dp = dpStart, cookiep = cookies;
-		     dp < dpEnd;
-		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) {
-			off += dp->d_reclen;
-			*cookiep++ = (u_long) off;
+		bqrelse(bp);
+		uio->uio_offset = offset;
+	}
+	/* We need to correct uio_offset. */
+	uio->uio_offset = offset;
+	if (error == EJUSTRETURN)
+		error = 0;
+	if (ap->a_ncookies != NULL) {
+		if (error == 0) {
+			ap->a_ncookies -= ncookies;
+		} else {
+			free(*ap->a_cookies, M_TEMP);
+			*ap->a_ncookies = 0;
+			*ap->a_cookies = NULL;
 		}
-		*ap->a_ncookies = ncookies;
-		*ap->a_cookies = cookies;
 	}
-	uio->uio_resid += lost;
-	if (ap->a_eofflag)
-	    *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
+	if (error == 0 && ap->a_eofflag)
+		*ap->a_eofflag = ip->i_size <= uio->uio_offset;
 	return (error);
 }
 
@@ -2589,11 +2585,12 @@
  * Vnode dvp must be locked.
  */
 static int
-ufs_makeinode(mode, dvp, vpp, cnp)
+ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
 	int mode;
 	struct vnode *dvp;
 	struct vnode **vpp;
 	struct componentname *cnp;
+	const char *callfunc;
 {
 	struct inode *ip, *pdir;
 	struct direct newdir;
@@ -2603,15 +2600,16 @@
 	pdir = VTOI(dvp);
 #ifdef INVARIANTS
 	if ((cnp->cn_flags & HASBUF) == 0)
-		panic("ufs_makeinode: no name");
+		panic("%s: no name", callfunc);
 #endif
 	*vpp = NULL;
 	if ((mode & IFMT) == 0)
 		mode |= IFREG;
 
-	if (VTOI(dvp)->i_effnlink < 2)
-		panic("ufs_makeinode: Bad link count %d on parent",
-		    VTOI(dvp)->i_effnlink);
+	if (pdir->i_effnlink < 2) {
+		print_bad_link_count(callfunc, dvp);
+		return (EINVAL);
+	}
 	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
 	if (error)
 		return (error);

Modified: trunk/sys/ufs/ufs/ufsmount.h
===================================================================
--- trunk/sys/ufs/ufs/ufsmount.h	2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufsmount.h	2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -27,11 +28,11 @@
  * SUCH DAMAGE.
  *
  *	@(#)ufsmount.h	8.6 (Berkeley) 3/30/95
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/ufsmount.h 297787 2016-04-10 16:32:21Z kib $
  */
 
 #ifndef _UFS_UFS_UFSMOUNT_H_
-#define _UFS_UFS_UFSMOUNT_H_
+#define	_UFS_UFS_UFSMOUNT_H_
 
 #include <sys/buf.h>	/* XXX For struct workhead. */
 
@@ -52,6 +53,7 @@
 struct buf;
 struct inode;
 struct nameidata;
+struct taskqueue;
 struct timeval;
 struct ucred;
 struct uio;
@@ -78,19 +80,7 @@
 	u_long	um_seqinc;			/* inc between seq blocks */
 	struct	mtx um_lock;			/* Protects ufsmount & fs */
 	pid_t	um_fsckpid;			/* PID permitted fsck sysctls */
-	long	um_numindirdeps;		/* outstanding indirdeps */
-	struct	workhead softdep_workitem_pending; /* softdep work queue */
-	struct	worklist *softdep_worklist_tail; /* Tail pointer for above */
-	struct	workhead softdep_journal_pending; /* journal work queue */
-	struct	worklist *softdep_journal_tail;	/* Tail pointer for above */
-	struct	jblocks *softdep_jblocks;	/* Journal block information */
-	struct	inodedeplst softdep_unlinked;	/* Unlinked inodes */
-	struct	bmsafemaphd softdep_dirtycg;	/* Dirty CGs */
-	int	softdep_on_journal;		/* Items on the journal list */
-	int	softdep_on_worklist;		/* Items on the worklist */
-	int	softdep_deps;			/* Total dependency count */
-	int	softdep_accdeps;		/* accumulated dep count */
-	int	softdep_req;			/* Wakeup when deps hits 0. */
+	struct	mount_softdeps *um_softdep;	/* softdep mgmt structure */
 	struct	vnode *um_quotas[MAXQUOTAS];	/* pointer to quota files */
 	struct	ucred *um_cred[MAXQUOTAS];	/* quota file access cred */
 	time_t	um_btime[MAXQUOTAS];		/* block quota time limit */
@@ -99,11 +89,15 @@
 	int64_t	um_savedmaxfilesize;		/* XXX - limit maxfilesize */
 	int	um_candelete;			/* devvp supports TRIM */
 	int	um_writesuspended;		/* suspension in progress */
-	int	(*um_balloc)(struct vnode *, off_t, int, struct ucred *, int, struct buf **);
+	u_int	um_trim_inflight;
+	struct taskqueue *um_trim_tq;
+	int	(*um_balloc)(struct vnode *, off_t, int, struct ucred *,
+		    int, struct buf **);
 	int	(*um_blkatoff)(struct vnode *, off_t, char **, struct buf **);
-	int	(*um_truncate)(struct vnode *, off_t, int, struct ucred *, struct thread *);
+	int	(*um_truncate)(struct vnode *, off_t, int, struct ucred *);
 	int	(*um_update)(struct vnode *, int);
-	int	(*um_valloc)(struct vnode *, int, struct ucred *, struct vnode **);
+	int	(*um_valloc)(struct vnode *, int, struct ucred *,
+		    struct vnode **);
 	int	(*um_vfree)(struct vnode *, ino_t, int);
 	void	(*um_ifree)(struct ufsmount *, struct inode *);
 	int	(*um_rdonly)(struct inode *);
@@ -110,13 +104,13 @@
 	void	(*um_snapgone)(struct inode *);
 };
 
-#define UFS_BALLOC(aa, bb, cc, dd, ee, ff) VFSTOUFS((aa)->v_mount)->um_balloc(aa, bb, cc, dd, ee, ff)
-#define UFS_BLKATOFF(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_blkatoff(aa, bb, cc, dd)
-#define UFS_TRUNCATE(aa, bb, cc, dd, ee) VFSTOUFS((aa)->v_mount)->um_truncate(aa, bb, cc, dd, ee)
-#define UFS_UPDATE(aa, bb) VFSTOUFS((aa)->v_mount)->um_update(aa, bb)
-#define UFS_VALLOC(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_valloc(aa, bb, cc, dd)
-#define UFS_VFREE(aa, bb, cc) VFSTOUFS((aa)->v_mount)->um_vfree(aa, bb, cc)
-#define UFS_IFREE(aa, bb) ((aa)->um_ifree(aa, bb))
+#define	UFS_BALLOC(aa, bb, cc, dd, ee, ff) VFSTOUFS((aa)->v_mount)->um_balloc(aa, bb, cc, dd, ee, ff)
+#define	UFS_BLKATOFF(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_blkatoff(aa, bb, cc, dd)
+#define	UFS_TRUNCATE(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_truncate(aa, bb, cc, dd)
+#define	UFS_UPDATE(aa, bb) VFSTOUFS((aa)->v_mount)->um_update(aa, bb)
+#define	UFS_VALLOC(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_valloc(aa, bb, cc, dd)
+#define	UFS_VFREE(aa, bb, cc) VFSTOUFS((aa)->v_mount)->um_vfree(aa, bb, cc)
+#define	UFS_IFREE(aa, bb) ((aa)->um_ifree(aa, bb))
 #define	UFS_RDONLY(aa) ((aa)->i_ump->um_rdonly(aa))
 #define	UFS_SNAPGONE(aa) ((aa)->i_ump->um_snapgone(aa))
 
@@ -127,8 +121,8 @@
 /*
  * Filesystem types
  */
-#define UFS1	1
-#define UFS2	2
+#define	UFS1	1
+#define	UFS2	2
 
 /*
  * Flags describing the state of quotas.
@@ -135,10 +129,10 @@
  */
 #define	QTF_OPENING	0x01			/* Q_QUOTAON in progress */
 #define	QTF_CLOSING	0x02			/* Q_QUOTAOFF in progress */
-#define QTF_64BIT	0x04			/* 64-bit quota file */
+#define	QTF_64BIT	0x04			/* 64-bit quota file */
 
 /* Convert mount ptr to ufsmount ptr. */
-#define VFSTOUFS(mp)	((struct ufsmount *)((mp)->mnt_data))
+#define	VFSTOUFS(mp)	((struct ufsmount *)((mp)->mnt_data))
 #define	UFSTOVFS(ump)	(ump)->um_mountp
 
 /*
@@ -145,7 +139,7 @@
  * Macros to access filesystem parameters in the ufsmount structure.
  * Used by ufs_bmap.
  */
-#define MNINDIR(ump)			((ump)->um_nindir)
+#define	MNINDIR(ump)			((ump)->um_nindir)
 #define	blkptrtodb(ump, b)		((b) << (ump)->um_bptrtodb)
 #define	is_sequential(ump, a, b)	((b) == (a) + ump->um_seqinc)
 #endif /* _KERNEL */