[Midnightbsd-cvs] src [9897] trunk/sys/ufs/ufs: sync with freebsd 10-stable
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Thu May 24 18:29:09 EDT 2018
Revision: 9897
http://svnweb.midnightbsd.org/src/?rev=9897
Author: laffer1
Date: 2018-05-24 18:29:08 -0400 (Thu, 24 May 2018)
Log Message:
-----------
sync with freebsd 10-stable
Modified Paths:
--------------
trunk/sys/ufs/ffs/ffs_alloc.c
trunk/sys/ufs/ffs/ffs_balloc.c
trunk/sys/ufs/ffs/ffs_extern.h
trunk/sys/ufs/ffs/ffs_inode.c
trunk/sys/ufs/ffs/ffs_rawread.c
trunk/sys/ufs/ffs/ffs_snapshot.c
trunk/sys/ufs/ffs/ffs_softdep.c
trunk/sys/ufs/ffs/ffs_subr.c
trunk/sys/ufs/ffs/ffs_suspend.c
trunk/sys/ufs/ffs/ffs_tables.c
trunk/sys/ufs/ffs/ffs_vfsops.c
trunk/sys/ufs/ffs/ffs_vnops.c
trunk/sys/ufs/ffs/fs.h
trunk/sys/ufs/ffs/softdep.h
trunk/sys/ufs/ufs/README.acls
trunk/sys/ufs/ufs/README.extattr
trunk/sys/ufs/ufs/acl.h
trunk/sys/ufs/ufs/dinode.h
trunk/sys/ufs/ufs/dir.h
trunk/sys/ufs/ufs/dirhash.h
trunk/sys/ufs/ufs/extattr.h
trunk/sys/ufs/ufs/gjournal.h
trunk/sys/ufs/ufs/inode.h
trunk/sys/ufs/ufs/quota.h
trunk/sys/ufs/ufs/ufs_acl.c
trunk/sys/ufs/ufs/ufs_bmap.c
trunk/sys/ufs/ufs/ufs_dirhash.c
trunk/sys/ufs/ufs/ufs_extattr.c
trunk/sys/ufs/ufs/ufs_extern.h
trunk/sys/ufs/ufs/ufs_gjournal.c
trunk/sys/ufs/ufs/ufs_inode.c
trunk/sys/ufs/ufs/ufs_lookup.c
trunk/sys/ufs/ufs/ufs_quota.c
trunk/sys/ufs/ufs/ufs_vfsops.c
trunk/sys/ufs/ufs/ufs_vnops.c
trunk/sys/ufs/ufs/ufsmount.h
Property Changed:
----------------
trunk/sys/ufs/ufs/README.acls
trunk/sys/ufs/ufs/README.extattr
Modified: trunk/sys/ufs/ffs/ffs_alloc.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_alloc.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_alloc.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -61,12 +61,12 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/ufs/ffs/ffs_alloc.c 248667 2013-03-23 22:41:48Z kib $");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_alloc.c 306630 2016-10-03 10:15:16Z kib $");
#include "opt_quota.h"
#include <sys/param.h>
-#include <sys/capability.h>
+#include <sys/capsicum.h>
#include <sys/systm.h>
#include <sys/bio.h>
#include <sys/buf.h>
@@ -113,8 +113,7 @@
#ifdef INVARIANTS
static int ffs_checkblk(struct inode *, ufs2_daddr_t, long);
#endif
-static ufs2_daddr_t ffs_clusteralloc(struct inode *, u_int, ufs2_daddr_t, int,
- int);
+static ufs2_daddr_t ffs_clusteralloc(struct inode *, u_int, ufs2_daddr_t, int);
static ino_t ffs_dirpref(struct inode *);
static ufs2_daddr_t ffs_fragextend(struct inode *, u_int, ufs2_daddr_t,
int, int);
@@ -255,17 +254,18 @@
struct buf *bp;
struct ufsmount *ump;
u_int cg, request, reclaimed;
- int error;
+ int error, gbflags;
ufs2_daddr_t bno;
static struct timeval lastfail;
static int curfail;
int64_t delta;
- *bpp = 0;
vp = ITOV(ip);
fs = ip->i_fs;
bp = NULL;
ump = ip->i_ump;
+ gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
+
mtx_assert(UFS_MTX(ump), MA_OWNED);
#ifdef INVARIANTS
if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
@@ -297,7 +297,7 @@
/*
* Allocate the extra space in the buffer.
*/
- error = bread(vp, lbprev, osize, NOCRED, &bp);
+ error = bread_gb(vp, lbprev, osize, NOCRED, gbflags, &bp);
if (error) {
brelse(bp);
return (error);
@@ -319,6 +319,7 @@
/*
* Check for extension in the existing location.
*/
+ *bpp = NULL;
cg = dtog(fs, bprev);
UFS_LOCK(ump);
bno = ffs_fragextend(ip, cg, bprev, osize, nsize);
@@ -333,7 +334,7 @@
ip->i_flag |= IN_CHANGE | IN_UPDATE;
allocbuf(bp, nsize);
bp->b_flags |= B_DONE;
- bzero(bp->b_data + osize, nsize - osize);
+ vfs_bio_bzero_buf(bp, osize, nsize - osize);
if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO)
vfs_bio_set_valid(bp, osize, nsize - osize);
*bpp = bp;
@@ -401,7 +402,7 @@
ip->i_flag |= IN_CHANGE | IN_UPDATE;
allocbuf(bp, nsize);
bp->b_flags |= B_DONE;
- bzero(bp->b_data + osize, nsize - osize);
+ vfs_bio_bzero_buf(bp, osize, nsize - osize);
if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO)
vfs_bio_set_valid(bp, osize, nsize - osize);
*bpp = bp;
@@ -459,11 +460,17 @@
SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem");
static int doasyncfree = 1;
-SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, "");
+SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0,
+"do not force synchronous writes when blocks are reallocated");
static int doreallocblks = 1;
-SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
+SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0,
+"enable block reallocation");
+static int maxclustersearch = 10;
+SYSCTL_INT(_vfs_ffs, OID_AUTO, maxclustersearch, CTLFLAG_RW, &maxclustersearch,
+0, "max number of cylinder group to search for contigous blocks");
+
#ifdef DEBUG
static volatile int prtrealloc = 0;
#endif
@@ -502,7 +509,7 @@
struct inode *ip;
struct vnode *vp;
struct buf *sbp, *ebp;
- ufs1_daddr_t *bap, *sbap, *ebap = 0;
+ ufs1_daddr_t *bap, *sbap, *ebap;
struct cluster_save *buflist;
struct ufsmount *ump;
ufs_lbn_t start_lbn, end_lbn;
@@ -509,13 +516,19 @@
ufs1_daddr_t soff, newblk, blkno;
ufs2_daddr_t pref;
struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
- int i, len, start_lvl, end_lvl, ssize;
+ int i, cg, len, start_lvl, end_lvl, ssize;
vp = ap->a_vp;
ip = VTOI(vp);
fs = ip->i_fs;
ump = ip->i_ump;
- if (fs->fs_contigsumsize <= 0)
+ /*
+ * If we are not tracking block clusters or if we have less than 4%
+ * free blocks left, then do not attempt to cluster. Running with
+ * less than 5% free block reserve is not recommended and those that
+ * choose to do so do not expect to have good file layout.
+ */
+ if (fs->fs_contigsumsize <= 0 || freespace(fs, 4) < 0)
return (ENOSPC);
buflist = ap->a_buflist;
len = buflist->bs_nchildren;
@@ -576,6 +589,7 @@
/*
* If the block range spans two block maps, get the second map.
*/
+ ebap = NULL;
if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
ssize = len;
} else {
@@ -590,18 +604,39 @@
ebap = (ufs1_daddr_t *)ebp->b_data;
}
/*
- * Find the preferred location for the cluster.
+ * Find the preferred location for the cluster. If we have not
+ * previously failed at this endeavor, then follow our standard
+ * preference calculation. If we have failed at it, then pick up
+ * where we last ended our search.
*/
UFS_LOCK(ump);
- pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap);
+ if (ip->i_nextclustercg == -1)
+ pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap);
+ else
+ pref = cgdata(fs, ip->i_nextclustercg);
/*
* Search the block map looking for an allocation of the desired size.
+ * To avoid wasting too much time, we limit the number of cylinder
+ * groups that we will search.
*/
- if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
- len, len, ffs_clusteralloc)) == 0) {
+ cg = dtog(fs, pref);
+ for (i = min(maxclustersearch, fs->fs_ncg); i > 0; i--) {
+ if ((newblk = ffs_clusteralloc(ip, cg, pref, len)) != 0)
+ break;
+ cg += 1;
+ if (cg >= fs->fs_ncg)
+ cg = 0;
+ }
+ /*
+ * If we have failed in our search, record where we gave up for
+ * next time. Otherwise, fall back to our usual search citerion.
+ */
+ if (newblk == 0) {
+ ip->i_nextclustercg = cg;
UFS_UNLOCK(ump);
goto fail;
}
+ ip->i_nextclustercg = -1;
/*
* We have found a new contiguous block.
*
@@ -611,7 +646,8 @@
*/
#ifdef DEBUG
if (prtrealloc)
- printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number,
+ printf("realloc: ino %ju, lbns %jd-%jd\n\told:",
+ (uintmax_t)ip->i_number,
(intmax_t)start_lbn, (intmax_t)end_lbn);
#endif
blkno = newblk;
@@ -723,19 +759,25 @@
struct inode *ip;
struct vnode *vp;
struct buf *sbp, *ebp;
- ufs2_daddr_t *bap, *sbap, *ebap = 0;
+ ufs2_daddr_t *bap, *sbap, *ebap;
struct cluster_save *buflist;
struct ufsmount *ump;
ufs_lbn_t start_lbn, end_lbn;
ufs2_daddr_t soff, newblk, blkno, pref;
struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
- int i, len, start_lvl, end_lvl, ssize;
+ int i, cg, len, start_lvl, end_lvl, ssize;
vp = ap->a_vp;
ip = VTOI(vp);
fs = ip->i_fs;
ump = ip->i_ump;
- if (fs->fs_contigsumsize <= 0)
+ /*
+ * If we are not tracking block clusters or if we have less than 4%
+ * free blocks left, then do not attempt to cluster. Running with
+ * less than 5% free block reserve is not recommended and those that
+ * choose to do so do not expect to have good file layout.
+ */
+ if (fs->fs_contigsumsize <= 0 || freespace(fs, 4) < 0)
return (ENOSPC);
buflist = ap->a_buflist;
len = buflist->bs_nchildren;
@@ -796,6 +838,7 @@
/*
* If the block range spans two block maps, get the second map.
*/
+ ebap = NULL;
if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
ssize = len;
} else {
@@ -810,18 +853,39 @@
ebap = (ufs2_daddr_t *)ebp->b_data;
}
/*
- * Find the preferred location for the cluster.
+ * Find the preferred location for the cluster. If we have not
+ * previously failed at this endeavor, then follow our standard
+ * preference calculation. If we have failed at it, then pick up
+ * where we last ended our search.
*/
UFS_LOCK(ump);
- pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap);
+ if (ip->i_nextclustercg == -1)
+ pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap);
+ else
+ pref = cgdata(fs, ip->i_nextclustercg);
/*
* Search the block map looking for an allocation of the desired size.
+ * To avoid wasting too much time, we limit the number of cylinder
+ * groups that we will search.
*/
- if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
- len, len, ffs_clusteralloc)) == 0) {
+ cg = dtog(fs, pref);
+ for (i = min(maxclustersearch, fs->fs_ncg); i > 0; i--) {
+ if ((newblk = ffs_clusteralloc(ip, cg, pref, len)) != 0)
+ break;
+ cg += 1;
+ if (cg >= fs->fs_ncg)
+ cg = 0;
+ }
+ /*
+ * If we have failed in our search, record where we gave up for
+ * next time. Otherwise, fall back to our usual search citerion.
+ */
+ if (newblk == 0) {
+ ip->i_nextclustercg = cg;
UFS_UNLOCK(ump);
goto fail;
}
+ ip->i_nextclustercg = -1;
/*
* We have found a new contiguous block.
*
@@ -1168,14 +1232,14 @@
for (cg = prefcg; cg < fs->fs_ncg; cg++)
if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
fs->fs_cs(fs, cg).cs_nifree >= minifree &&
- fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
+ fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
if (fs->fs_contigdirs[cg] < maxcontigdirs)
return ((ino_t)(fs->fs_ipg * cg));
}
- for (cg = prefcg - 1; cg >= 0; cg--)
+ for (cg = 0; cg < prefcg; cg++)
if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
fs->fs_cs(fs, cg).cs_nifree >= minifree &&
- fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
+ fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
if (fs->fs_contigdirs[cg] < maxcontigdirs)
return ((ino_t)(fs->fs_ipg * cg));
}
@@ -1185,7 +1249,7 @@
for (cg = prefcg; cg < fs->fs_ncg; cg++)
if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
return ((ino_t)(fs->fs_ipg * cg));
- for (cg = prefcg - 1; cg >= 0; cg--)
+ for (cg = 0; cg < prefcg; cg++)
if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
break;
return ((ino_t)(fs->fs_ipg * cg));
@@ -1194,7 +1258,8 @@
/*
* Select the desired position for the next block in a file. The file is
* logically divided into sections. The first section is composed of the
- * direct blocks. Each additional section contains fs_maxbpg blocks.
+ * direct blocks and the next fs_maxbpg blocks. Each additional section
+ * contains fs_maxbpg blocks.
*
* If no blocks have been allocated in the first section, the policy is to
* request a block in the same cylinder group as the inode that describes
@@ -1212,14 +1277,12 @@
* cylinder group from which the previous allocation was made. The sweep
* continues until a cylinder group with greater than the average number
* of free blocks is found. If the allocation is for the first block in an
- * indirect block, the information on the previous allocation is unavailable;
- * here a best guess is made based upon the logical block number being
- * allocated.
+ * indirect block or the previous block is a hole, then the information on
+ * the previous allocation is unavailable; here a best guess is made based
+ * on the logical block number being allocated.
*
* If a section is already partially allocated, the policy is to
- * contiguously allocate fs_maxcontig blocks. The end of one of these
- * contiguous blocks and the beginning of the next is laid out
- * contiguously if possible.
+ * allocate blocks contiguously within the section if possible.
*/
ufs2_daddr_t
ffs_blkpref_ufs1(ip, lbn, indx, bap)
@@ -1708,7 +1771,7 @@
cgp = (struct cg *)bp->b_data;
blksfree = cg_blksfree(cgp);
if (bpref == 0) {
- bpref = cgp->cg_rotor;
+ bpref = cgbase(fs, cgp->cg_cgx) + cgp->cg_rotor + fs->fs_frag;
} else if ((cgbpref = dtog(fs, bpref)) != cgp->cg_cgx) {
/* map bpref to correct zone in this cg */
if (bpref < cgdata(fs, cgbpref))
@@ -1772,12 +1835,11 @@
* take the first one that we find following bpref.
*/
static ufs2_daddr_t
-ffs_clusteralloc(ip, cg, bpref, len, unused)
+ffs_clusteralloc(ip, cg, bpref, len)
struct inode *ip;
u_int cg;
ufs2_daddr_t bpref;
int len;
- int unused;
{
struct fs *fs;
struct cg *cgp;
@@ -1920,9 +1982,9 @@
struct cg *cgp;
struct buf *bp, *ibp;
struct ufsmount *ump;
- u_int8_t *inosused;
+ u_int8_t *inosused, *loc;
struct ufs2_dinode *dp2;
- int error, start, len, loc, map, i;
+ int error, start, len, i;
u_int32_t old_initediblk;
fs = ip->i_fs;
@@ -1954,12 +2016,12 @@
}
start = cgp->cg_irotor / NBBY;
len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
- loc = skpc(0xff, len, &inosused[start]);
- if (loc == 0) {
+ loc = memcchr(&inosused[start], 0xff, len);
+ if (loc == NULL) {
len = start + 1;
start = 0;
- loc = skpc(0xff, len, &inosused[0]);
- if (loc == 0) {
+ loc = memcchr(&inosused[start], 0xff, len);
+ if (loc == NULL) {
printf("cg = %d, irotor = %ld, fs = %s\n",
cg, (long)cgp->cg_irotor, fs->fs_fsmnt);
panic("ffs_nodealloccg: map corrupted");
@@ -1966,13 +2028,7 @@
/* NOTREACHED */
}
}
- i = start + len - loc;
- map = inosused[i] ^ 0xff;
- if (map == 0) {
- printf("fs = %s\n", fs->fs_fsmnt);
- panic("ffs_nodealloccg: block not in map");
- }
- ipref = i * NBBY + ffs(map) - 1;
+ ipref = (loc - inosused) * NBBY + ffs(~*loc) - 1;
gotit:
/*
* Check to see if we need to initialize more inodes.
@@ -2101,12 +2157,13 @@
/* devvp is a snapshot */
dev = VTOI(devvp)->i_devvp->v_rdev;
cgblkno = fragstoblks(fs, cgtod(fs, cg));
- } else {
+ } else if (devvp->v_type == VCHR) {
/* devvp is a normal disk device */
dev = devvp->v_rdev;
cgblkno = fsbtodb(fs, cgtod(fs, cg));
ASSERT_VOP_LOCKED(devvp, "ffs_blkfree_cg");
- }
+ } else
+ return;
#ifdef INVARIANTS
if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
@@ -2200,14 +2257,12 @@
ACTIVECLEAR(fs, cg);
UFS_UNLOCK(ump);
mp = UFSTOVFS(ump);
- if (MOUNTEDSOFTDEP(mp) && devvp->v_type != VREG)
+ if (MOUNTEDSOFTDEP(mp) && devvp->v_type == VCHR)
softdep_setup_blkfree(UFSTOVFS(ump), bp, bno,
numfrags(fs, size), dephd);
bdwrite(bp);
}
-TASKQUEUE_DEFINE_THREAD(ffs_trim);
-
struct ffs_blkfree_trim_params {
struct task task;
struct ufsmount *ump;
@@ -2230,6 +2285,7 @@
ffs_blkfree_cg(tp->ump, tp->ump->um_fs, tp->devvp, tp->bno, tp->size,
tp->inum, tp->pdephd);
vn_finished_secondary_write(UFSTOVFS(tp->ump));
+ atomic_add_int(&tp->ump->um_trim_inflight, -1);
free(tp, M_TEMP);
}
@@ -2242,7 +2298,7 @@
tp = bip->bio_caller2;
g_destroy_bio(bip);
TASK_INIT(&tp->task, 0, ffs_blkfree_trim_task, tp);
- taskqueue_enqueue(taskqueue_ffs_trim, &tp->task);
+ taskqueue_enqueue(tp->ump->um_trim_tq, &tp->task);
}
void
@@ -2266,7 +2322,7 @@
* it has a snapshot(s) associated with it, and one of the
* snapshots wants to claim the block.
*/
- if (devvp->v_type != VREG &&
+ if (devvp->v_type == VCHR &&
(devvp->v_vflag & VV_COPYONWRITE) &&
ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, dephd)) {
return;
@@ -2286,6 +2342,7 @@
* reordering, TRIM might be issued after we reuse the block
* and write some new data into it.
*/
+ atomic_add_int(&ump->um_trim_inflight, 1);
tp = malloc(sizeof(struct ffs_blkfree_trim_params), M_TEMP, M_WAITOK);
tp->ump = ump;
tp->devvp = devvp;
@@ -2408,14 +2465,17 @@
/* devvp is a snapshot */
dev = VTOI(devvp)->i_devvp->v_rdev;
cgbno = fragstoblks(fs, cgtod(fs, cg));
- } else {
+ } else if (devvp->v_type == VCHR) {
/* devvp is a normal disk device */
dev = devvp->v_rdev;
cgbno = fsbtodb(fs, cgtod(fs, cg));
+ } else {
+ bp = NULL;
+ return (0);
}
if (ino >= fs->fs_ipg * fs->fs_ncg)
- panic("ffs_freefile: range: dev = %s, ino = %lu, fs = %s",
- devtoname(dev), (u_long)ino, fs->fs_fsmnt);
+ panic("ffs_freefile: range: dev = %s, ino = %ju, fs = %s",
+ devtoname(dev), (uintmax_t)ino, fs->fs_fsmnt);
if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) {
brelse(bp);
return (error);
@@ -2430,8 +2490,8 @@
inosused = cg_inosused(cgp);
ino %= fs->fs_ipg;
if (isclr(inosused, ino)) {
- printf("dev = %s, ino = %u, fs = %s\n", devtoname(dev),
- ino + cg * fs->fs_ipg, fs->fs_fsmnt);
+ printf("dev = %s, ino = %ju, fs = %s\n", devtoname(dev),
+ (uintmax_t)(ino + cg * fs->fs_ipg), fs->fs_fsmnt);
if (fs->fs_ronly == 0)
panic("ffs_freefile: freeing free inode");
}
@@ -2450,7 +2510,7 @@
fs->fs_fmod = 1;
ACTIVECLEAR(fs, cg);
UFS_UNLOCK(ump);
- if (MOUNTEDSOFTDEP(UFSTOVFS(ump)) && devvp->v_type != VREG)
+ if (MOUNTEDSOFTDEP(UFSTOVFS(ump)) && devvp->v_type == VCHR)
softdep_setup_inofree(UFSTOVFS(ump), bp,
ino + cg * fs->fs_ipg, wkhd);
bdwrite(bp);
@@ -2477,9 +2537,11 @@
if (devvp->v_type == VREG) {
/* devvp is a snapshot */
cgbno = fragstoblks(fs, cgtod(fs, cg));
- } else {
+ } else if (devvp->v_type == VCHR) {
/* devvp is a normal disk device */
cgbno = fsbtodb(fs, cgtod(fs, cg));
+ } else {
+ return (1);
}
if (ino >= fs->fs_ipg * fs->fs_ncg)
return (1);
@@ -2581,8 +2643,9 @@
struct thread *td = curthread; /* XXX */
struct proc *p = td->td_proc;
- log(LOG_ERR, "pid %d (%s), uid %d inumber %d on %s: %s\n",
- p->p_pid, p->p_comm, td->td_ucred->cr_uid, inum, fs->fs_fsmnt, cp);
+ log(LOG_ERR, "pid %d (%s), uid %d inumber %ju on %s: %s\n",
+ p->p_pid, p->p_comm, td->td_ucred->cr_uid, (uintmax_t)inum,
+ fs->fs_fsmnt, cp);
}
/*
@@ -2701,7 +2764,8 @@
long blkcnt, blksize;
struct filedesc *fdp;
struct file *fp, *vfp;
- int vfslocked, filetype, error;
+ cap_rights_t rights;
+ int filetype, error;
static struct fileops *origops, bufferedops;
if (req->newlen > sizeof cmd)
@@ -2710,8 +2774,8 @@
return (error);
if (cmd.version != FFS_CMD_VERSION)
return (ERPCMISMATCH);
- if ((error = getvnode(td->td_proc->p_fd, cmd.handle, CAP_FSCK,
- &fp)) != 0)
+ if ((error = getvnode(td->td_proc->p_fd, cmd.handle,
+ cap_rights_init(&rights, CAP_FSCK), &fp)) != 0)
return (error);
vp = fp->f_data;
if (vp->v_type != VREG && vp->v_type != VDIR) {
@@ -2719,7 +2783,8 @@
return (EINVAL);
}
vn_start_write(vp, &mp, V_WAIT);
- if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) {
+ if (mp == NULL ||
+ strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) {
vn_finished_write(mp);
fdrop(fp, td);
return (EINVAL);
@@ -2794,16 +2859,16 @@
#ifdef DEBUG
if (fsckcmds) {
if (cmd.size == 1)
- printf("%s: free %s inode %d\n",
+ printf("%s: free %s inode %ju\n",
mp->mnt_stat.f_mntonname,
filetype == IFDIR ? "directory" : "file",
- (ino_t)cmd.value);
+ (uintmax_t)cmd.value);
else
- printf("%s: free %s inodes %d-%d\n",
+ printf("%s: free %s inodes %ju-%ju\n",
mp->mnt_stat.f_mntonname,
filetype == IFDIR ? "directory" : "file",
- (ino_t)cmd.value,
- (ino_t)(cmd.value + cmd.size - 1));
+ (uintmax_t)cmd.value,
+ (uintmax_t)(cmd.value + cmd.size - 1));
}
#endif /* DEBUG */
while (cmd.size > 0) {
@@ -2906,23 +2971,18 @@
#endif /* DEBUG */
if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_SHARED, &vp)))
break;
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
AUDIT_ARG_VNODE1(vp);
if ((error = change_dir(vp, td)) != 0) {
vput(vp);
- VFS_UNLOCK_GIANT(vfslocked);
break;
}
VOP_UNLOCK(vp, 0);
- VFS_UNLOCK_GIANT(vfslocked);
fdp = td->td_proc->p_fd;
FILEDESC_XLOCK(fdp);
vpold = fdp->fd_cdir;
fdp->fd_cdir = vp;
FILEDESC_XUNLOCK(fdp);
- vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
vrele(vpold);
- VFS_UNLOCK_GIANT(vfslocked);
break;
case FFS_SET_DOTDOT:
@@ -2995,7 +3055,6 @@
#endif /* DEBUG */
if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
break;
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
AUDIT_ARG_VNODE1(vp);
ip = VTOI(vp);
if (ip->i_ump->um_fstype == UFS1)
@@ -3006,13 +3065,11 @@
sizeof(struct ufs2_dinode));
if (error) {
vput(vp);
- VFS_UNLOCK_GIANT(vfslocked);
break;
}
ip->i_flag |= IN_CHANGE | IN_MODIFIED;
error = ffs_update(vp, 1);
vput(vp);
- VFS_UNLOCK_GIANT(vfslocked);
break;
case FFS_SET_BUFOUTPUT:
@@ -3033,7 +3090,7 @@
}
#endif /* DEBUG */
if ((error = getvnode(td->td_proc->p_fd, cmd.value,
- CAP_FSCK, &vfp)) != 0)
+ cap_rights_init(&rights, CAP_FSCK), &vfp)) != 0)
break;
if (vfp->f_vnode->v_type != VCHR) {
fdrop(vfp, td);
@@ -3090,7 +3147,7 @@
struct buf *bp;
struct fs *fs;
struct filedesc *fdp;
- int error, vfslocked;
+ int error;
daddr_t lbn;
/*
@@ -3107,7 +3164,6 @@
vp = fdp->fd_cdir;
vref(vp);
FILEDESC_SUNLOCK(fdp);
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
vn_lock(vp, LK_SHARED | LK_RETRY);
/*
* Check that the current directory vnode indeed belongs to
@@ -3115,18 +3171,15 @@
*/
if (vp->v_op != &ffs_vnodeops1 && vp->v_op != &ffs_vnodeops2) {
vput(vp);
- VFS_UNLOCK_GIANT(vfslocked);
return (EINVAL);
}
ip = VTOI(vp);
if (ip->i_devvp != devvp) {
vput(vp);
- VFS_UNLOCK_GIANT(vfslocked);
return (EINVAL);
}
fs = ip->i_fs;
vput(vp);
- VFS_UNLOCK_GIANT(vfslocked);
foffset_lock_uio(fp, uio, flags);
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
#ifdef DEBUG
Modified: trunk/sys/ufs/ffs/ffs_balloc.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_balloc.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_balloc.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2002 Networks Associates Technology, Inc.
* All rights reserved.
@@ -60,7 +61,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_balloc.c 304672 2016-08-23 07:55:32Z kib $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -107,7 +108,7 @@
int saved_inbdflush;
static struct timeval lastfail;
static int curfail;
- int reclaimed;
+ int gbflags, reclaimed;
ip = VTOI(vp);
dp = ip->i_din1;
@@ -123,6 +124,7 @@
return (EOPNOTSUPP);
if (lbn < 0)
return (EFBIG);
+ gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
if (DOINGSOFTDEP(vp))
softdep_prealloc(vp, MNT_WAIT);
@@ -211,7 +213,7 @@
nsize, flags, cred, &newb);
if (error)
return (error);
- bp = getblk(vp, lbn, nsize, 0, 0, 0);
+ bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
bp->b_blkno = fsbtodb(fs, newb);
if (flags & BA_CLRBUF)
vfs_bio_clrbuf(bp);
@@ -247,7 +249,7 @@
UFS_LOCK(ump);
pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
(ufs1_daddr_t *)0);
- if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+ if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags, cred, &newb)) != 0) {
curthread_pflags_restore(saved_inbdflush);
return (error);
@@ -254,9 +256,11 @@
}
pref = newb + fs->fs_frag;
nb = newb;
+ MPASS(allocblk < allociblk + nitems(allociblk));
+ MPASS(lbns_remfree < lbns + nitems(lbns));
*allocblk++ = nb;
*lbns_remfree++ = indirs[1].in_lbn;
- bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
+ bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
bp->b_blkno = fsbtodb(fs, nb);
vfs_bio_clrbuf(bp);
if (DOINGSOFTDEP(vp)) {
@@ -298,6 +302,10 @@
continue;
}
UFS_LOCK(ump);
+ /*
+ * If parent indirect has just been allocated, try to cluster
+ * immediately following it.
+ */
if (pref == 0)
pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
(ufs1_daddr_t *)0);
@@ -304,7 +312,7 @@
if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags | IO_BUFLOCKED, cred, &newb)) != 0) {
brelse(bp);
- if (++reclaimed == 1) {
+ if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
UFS_LOCK(ump);
softdep_request_cleanup(fs, vp, cred,
FLUSH_BLOCKS_WAIT);
@@ -320,6 +328,8 @@
}
pref = newb + fs->fs_frag;
nb = newb;
+ MPASS(allocblk < allociblk + nitems(allociblk));
+ MPASS(lbns_remfree < lbns + nitems(lbns));
*allocblk++ = nb;
*lbns_remfree++ = indirs[i].in_lbn;
nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
@@ -367,7 +377,14 @@
*/
if (nb == 0) {
UFS_LOCK(ump);
- if (pref == 0)
+ /*
+ * If allocating metadata at the front of the cylinder
+ * group and parent indirect block has just been allocated,
+ * then cluster next to it if it is the first indirect in
+ * the file. Otherwise it has been allocated in the metadata
+ * area, so we want to find our own place out in the data area.
+ */
+ if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0))
pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
&bap[0]);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
@@ -374,7 +391,7 @@
flags | IO_BUFLOCKED, cred, &newb);
if (error) {
brelse(bp);
- if (++reclaimed == 1) {
+ if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
UFS_LOCK(ump);
softdep_request_cleanup(fs, vp, cred,
FLUSH_BLOCKS_WAIT);
@@ -389,9 +406,11 @@
goto fail;
}
nb = newb;
+ MPASS(allocblk < allociblk + nitems(allociblk));
+ MPASS(lbns_remfree < lbns + nitems(lbns));
*allocblk++ = nb;
*lbns_remfree++ = lbn;
- nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
+ nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
nbp->b_blkno = fsbtodb(fs, nb);
if (flags & BA_CLRBUF)
vfs_bio_clrbuf(nbp);
@@ -417,12 +436,15 @@
brelse(bp);
if (flags & BA_CLRBUF) {
int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
- if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+ if (seqcount != 0 &&
+ (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
+ !(vm_page_count_severe() || buf_dirty_count_severe())) {
error = cluster_read(vp, ip->i_size, lbn,
(int)fs->fs_bsize, NOCRED,
- MAXBSIZE, seqcount, &nbp);
+ MAXBSIZE, seqcount, gbflags, &nbp);
} else {
- error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
+ error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
+ gbflags, &nbp);
}
if (error) {
brelse(nbp);
@@ -429,7 +451,7 @@
goto fail;
}
} else {
- nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
+ nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
nbp->b_blkno = fsbtodb(fs, nb);
}
curthread_pflags_restore(saved_inbdflush);
@@ -451,7 +473,7 @@
* We have to sync it at the end so that the soft updates code
* does not find any untracked changes. Although this is really
* slow, running out of disk space is not expected to be a common
- * occurence. The error return from fsync is ignored as we already
+ * occurrence. The error return from fsync is ignored as we already
* have an error to return to the user.
*
* XXX Still have to journal the free below
@@ -463,10 +485,16 @@
* We shall not leave the freed blocks on the vnode
* buffer object lists.
*/
- bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
+ bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+ GB_NOCREAT | GB_UNMAPPED);
if (bp != NULL) {
- bp->b_flags |= (B_INVAL | B_RELBUF);
- bp->b_flags &= ~B_ASYNC;
+ KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
+ ("mismatch1 l %jd %jd b %ju %ju",
+ (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
+ (uintmax_t)bp->b_blkno,
+ (uintmax_t)fsbtodb(fs, *blkp)));
+ bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
+ bp->b_flags &= ~(B_ASYNC | B_CACHE);
brelse(bp);
}
deallocated += fs->fs_bsize;
@@ -509,6 +537,18 @@
* cleared, free the blocks.
*/
for (blkp = allociblk; blkp < allocblk; blkp++) {
+#ifdef INVARIANTS
+ if (blkp == allociblk)
+ lbns_remfree = lbns;
+ bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+ GB_NOCREAT | GB_UNMAPPED);
+ if (bp != NULL) {
+ panic("zombie1 %jd %ju %ju",
+ (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
+ (uintmax_t)fsbtodb(fs, *blkp));
+ }
+ lbns_remfree++;
+#endif
ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
ip->i_number, vp->v_type, NULL);
}
@@ -541,7 +581,7 @@
int saved_inbdflush;
static struct timeval lastfail;
static int curfail;
- int reclaimed;
+ int gbflags, reclaimed;
ip = VTOI(vp);
dp = ip->i_din2;
@@ -555,6 +595,7 @@
*bpp = NULL;
if (lbn < 0)
return (EFBIG);
+ gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
if (DOINGSOFTDEP(vp))
softdep_prealloc(vp, MNT_WAIT);
@@ -605,7 +646,8 @@
panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
nb = dp->di_extb[lbn];
if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
- error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
+ error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
+ gbflags, &bp);
if (error) {
brelse(bp);
return (error);
@@ -622,7 +664,8 @@
osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
nsize = fragroundup(fs, size);
if (nsize <= osize) {
- error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
+ error = bread_gb(vp, -1 - lbn, osize, NOCRED,
+ gbflags, &bp);
if (error) {
brelse(bp);
return (error);
@@ -655,7 +698,7 @@
nsize, flags, cred, &newb);
if (error)
return (error);
- bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0);
+ bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
bp->b_blkno = fsbtodb(fs, newb);
bp->b_xflags |= BX_ALTDATA;
if (flags & BA_CLRBUF)
@@ -709,7 +752,8 @@
panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
nb = dp->di_db[lbn];
if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
- error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
+ error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
+ gbflags, &bp);
if (error) {
brelse(bp);
return (error);
@@ -725,7 +769,8 @@
osize = fragroundup(fs, blkoff(fs, ip->i_size));
nsize = fragroundup(fs, size);
if (nsize <= osize) {
- error = bread(vp, lbn, osize, NOCRED, &bp);
+ error = bread_gb(vp, lbn, osize, NOCRED,
+ gbflags, &bp);
if (error) {
brelse(bp);
return (error);
@@ -755,7 +800,7 @@
&dp->di_db[0]), nsize, flags, cred, &newb);
if (error)
return (error);
- bp = getblk(vp, lbn, nsize, 0, 0, 0);
+ bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
bp->b_blkno = fsbtodb(fs, newb);
if (flags & BA_CLRBUF)
vfs_bio_clrbuf(bp);
@@ -791,7 +836,7 @@
UFS_LOCK(ump);
pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
(ufs2_daddr_t *)0);
- if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+ if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags, cred, &newb)) != 0) {
curthread_pflags_restore(saved_inbdflush);
return (error);
@@ -798,9 +843,12 @@
}
pref = newb + fs->fs_frag;
nb = newb;
+ MPASS(allocblk < allociblk + nitems(allociblk));
+ MPASS(lbns_remfree < lbns + nitems(lbns));
*allocblk++ = nb;
*lbns_remfree++ = indirs[1].in_lbn;
- bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
+ bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
+ GB_UNMAPPED);
bp->b_blkno = fsbtodb(fs, nb);
vfs_bio_clrbuf(bp);
if (DOINGSOFTDEP(vp)) {
@@ -842,6 +890,10 @@
continue;
}
UFS_LOCK(ump);
+ /*
+ * If parent indirect has just been allocated, try to cluster
+ * immediately following it.
+ */
if (pref == 0)
pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
(ufs2_daddr_t *)0);
@@ -848,7 +900,7 @@
if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags | IO_BUFLOCKED, cred, &newb)) != 0) {
brelse(bp);
- if (++reclaimed == 1) {
+ if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
UFS_LOCK(ump);
softdep_request_cleanup(fs, vp, cred,
FLUSH_BLOCKS_WAIT);
@@ -864,9 +916,12 @@
}
pref = newb + fs->fs_frag;
nb = newb;
+ MPASS(allocblk < allociblk + nitems(allociblk));
+ MPASS(lbns_remfree < lbns + nitems(lbns));
*allocblk++ = nb;
*lbns_remfree++ = indirs[i].in_lbn;
- nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
+ nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
+ GB_UNMAPPED);
nbp->b_blkno = fsbtodb(fs, nb);
vfs_bio_clrbuf(nbp);
if (DOINGSOFTDEP(vp)) {
@@ -911,7 +966,14 @@
*/
if (nb == 0) {
UFS_LOCK(ump);
- if (pref == 0)
+ /*
+ * If allocating metadata at the front of the cylinder
+ * group and parent indirect block has just been allocated,
+ * then cluster next to it if it is the first indirect in
+ * the file. Otherwise it has been allocated in the metadata
+ * area, so we want to find our own place out in the data area.
+ */
+ if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0))
pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
&bap[0]);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
@@ -918,7 +980,7 @@
flags | IO_BUFLOCKED, cred, &newb);
if (error) {
brelse(bp);
- if (++reclaimed == 1) {
+ if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
UFS_LOCK(ump);
softdep_request_cleanup(fs, vp, cred,
FLUSH_BLOCKS_WAIT);
@@ -933,9 +995,11 @@
goto fail;
}
nb = newb;
+ MPASS(allocblk < allociblk + nitems(allociblk));
+ MPASS(lbns_remfree < lbns + nitems(lbns));
*allocblk++ = nb;
*lbns_remfree++ = lbn;
- nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
+ nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
nbp->b_blkno = fsbtodb(fs, nb);
if (flags & BA_CLRBUF)
vfs_bio_clrbuf(nbp);
@@ -967,12 +1031,15 @@
*/
if (flags & BA_CLRBUF) {
int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
- if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+ if (seqcount != 0 &&
+ (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
+ !(vm_page_count_severe() || buf_dirty_count_severe())) {
error = cluster_read(vp, ip->i_size, lbn,
(int)fs->fs_bsize, NOCRED,
- MAXBSIZE, seqcount, &nbp);
+ MAXBSIZE, seqcount, gbflags, &nbp);
} else {
- error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
+ error = bread_gb(vp, lbn, (int)fs->fs_bsize,
+ NOCRED, gbflags, &nbp);
}
if (error) {
brelse(nbp);
@@ -979,7 +1046,7 @@
goto fail;
}
} else {
- nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
+ nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
nbp->b_blkno = fsbtodb(fs, nb);
}
curthread_pflags_restore(saved_inbdflush);
@@ -1001,7 +1068,7 @@
* We have to sync it at the end so that the soft updates code
* does not find any untracked changes. Although this is really
* slow, running out of disk space is not expected to be a common
- * occurence. The error return from fsync is ignored as we already
+ * occurrence. The error return from fsync is ignored as we already
* have an error to return to the user.
*
* XXX Still have to journal the free below
@@ -1013,10 +1080,16 @@
* We shall not leave the freed blocks on the vnode
* buffer object lists.
*/
- bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
+ bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+ GB_NOCREAT | GB_UNMAPPED);
if (bp != NULL) {
- bp->b_flags |= (B_INVAL | B_RELBUF);
- bp->b_flags &= ~B_ASYNC;
+ KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
+ ("mismatch2 l %jd %jd b %ju %ju",
+ (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
+ (uintmax_t)bp->b_blkno,
+ (uintmax_t)fsbtodb(fs, *blkp)));
+ bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
+ bp->b_flags &= ~(B_ASYNC | B_CACHE);
brelse(bp);
}
deallocated += fs->fs_bsize;
@@ -1059,6 +1132,18 @@
* cleared, free the blocks.
*/
for (blkp = allociblk; blkp < allocblk; blkp++) {
+#ifdef INVARIANTS
+ if (blkp == allociblk)
+ lbns_remfree = lbns;
+ bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+ GB_NOCREAT | GB_UNMAPPED);
+ if (bp != NULL) {
+ panic("zombie2 %jd %ju %ju",
+ (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
+ (uintmax_t)fsbtodb(fs, *blkp));
+ }
+ lbns_remfree++;
+#endif
ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
ip->i_number, vp->v_type, NULL);
}
Modified: trunk/sys/ufs/ffs/ffs_extern.h
===================================================================
--- trunk/sys/ufs/ffs/ffs_extern.h 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_extern.h 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
@@ -27,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)ffs_extern.h 8.6 (Berkeley) 3/30/95
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ffs/ffs_extern.h 306175 2016-09-22 10:42:40Z kib $
*/
#ifndef _UFS_FFS_EXTERN_H
@@ -95,7 +96,7 @@
void process_deferred_inactive(struct mount *mp);
void ffs_sync_snap(struct mount *, int);
int ffs_syncvnode(struct vnode *vp, int waitfor, int flags);
-int ffs_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *);
+int ffs_truncate(struct vnode *, off_t, int, struct ucred *);
int ffs_update(struct vnode *, int);
int ffs_valloc(struct vnode *, int, struct ucred *, struct vnode **);
@@ -107,6 +108,9 @@
#define FFSV_FORCEINSMQ 0x0001
+#define FFSR_FORCE 0x0001
+#define FFSR_UNSUSPEND 0x0002
+
extern struct vop_vector ffs_vnodeops1;
extern struct vop_vector ffs_fifoops1;
extern struct vop_vector ffs_vnodeops2;
@@ -152,9 +156,7 @@
void softdep_fsync_mountdev(struct vnode *);
int softdep_sync_metadata(struct vnode *);
int softdep_sync_buf(struct vnode *, struct buf *, int);
-int softdep_process_worklist(struct mount *, int);
int softdep_fsync(struct vnode *);
-int softdep_waitidle(struct mount *);
int softdep_prealloc(struct vnode *, int);
int softdep_journal_lookup(struct mount *, struct vnode **);
void softdep_journal_freeblocks(struct inode *, struct ucred *, off_t, int);
@@ -167,10 +169,10 @@
/*
* Things to request flushing in softdep_request_cleanup()
*/
-#define FLUSH_INODES 1
-#define FLUSH_INODES_WAIT 2
-#define FLUSH_BLOCKS 3
-#define FLUSH_BLOCKS_WAIT 4
+#define FLUSH_INODES 1
+#define FLUSH_INODES_WAIT 2
+#define FLUSH_BLOCKS 3
+#define FLUSH_BLOCKS_WAIT 4
/*
* Flag to ffs_syncvnode() to request flushing of data only,
* but skip the ffs_update() on the inode itself. Used to avoid
Modified: trunk/sys/ufs/ffs/ffs_inode.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_inode.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_inode.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
@@ -30,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_inode.c 300600 2016-05-24 10:41:34Z kib $");
#include "opt_quota.h"
@@ -43,6 +44,7 @@
#include <sys/vnode.h>
#include <sys/malloc.h>
#include <sys/resourcevar.h>
+#include <sys/rwlock.h>
#include <sys/vmmeter.h>
#include <sys/stat.h>
@@ -170,12 +172,11 @@
* disk blocks.
*/
int
-ffs_truncate(vp, length, flags, cred, td)
+ffs_truncate(vp, length, flags, cred)
struct vnode *vp;
off_t length;
int flags;
struct ucred *cred;
- struct thread *td;
{
struct inode *ip;
ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR];
@@ -449,7 +450,7 @@
ip->i_size = osize;
DIP_SET(ip, i_size, osize);
- error = vtruncbuf(vp, cred, td, length, fs->fs_bsize);
+ error = vtruncbuf(vp, cred, length, fs->fs_bsize);
if (error && (allerror == 0))
allerror = error;
@@ -562,7 +563,7 @@
softdep_journal_freeblocks(ip, cred, length, IO_EXT);
else
softdep_setup_freeblocks(ip, length, IO_EXT);
- return (ffs_update(vp, !DOINGASYNC(vp)));
+ return (ffs_update(vp, (flags & IO_SYNC) != 0 || !DOINGASYNC(vp)));
}
/*
Modified: trunk/sys/ufs/ffs/ffs_rawread.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_rawread.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_rawread.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2000-2003 Tor Egge
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_rawread.c 318267 2017-05-14 12:00:00Z kib $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -42,6 +43,7 @@
#include <sys/ttycom.h>
#include <sys/bio.h>
#include <sys/buf.h>
+#include <sys/rwlock.h>
#include <ufs/ufs/extattr.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
@@ -70,8 +72,6 @@
int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
-void ffs_rawread_setup(void);
-
SYSCTL_DECL(_vfs_ffs);
static int ffsrawbufcnt = 4;
@@ -86,14 +86,14 @@
SYSCTL_INT(_vfs_ffs, OID_AUTO, rawreadahead, CTLFLAG_RW, &rawreadahead, 0,
"Flag to enable readahead for long raw reads");
+static void
+ffs_rawread_setup(void *arg __unused)
+{
-void
-ffs_rawread_setup(void)
-{
ffsrawbufcnt = (nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8;
}
+SYSINIT(ffs_raw, SI_SUB_VM_CONF, SI_ORDER_ANY, ffs_rawread_setup, NULL);
-
static int
ffs_rawread_sync(struct vnode *vp)
{
@@ -143,9 +143,9 @@
if ((obj = vp->v_object) != NULL &&
(obj->flags & OBJ_MIGHTBEDIRTY) != 0) {
VI_UNLOCK(vp);
- VM_OBJECT_LOCK(obj);
+ VM_OBJECT_WLOCK(obj);
vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
- VM_OBJECT_UNLOCK(obj);
+ VM_OBJECT_WUNLOCK(obj);
} else
VI_UNLOCK(vp);
@@ -240,7 +240,7 @@
bp->b_bcount = bsize - blockoff * DEV_BSIZE;
bp->b_bufsize = bp->b_bcount;
- if (vmapbuf(bp) < 0)
+ if (vmapbuf(bp, 1) < 0)
return EFAULT;
maybe_yield();
@@ -259,7 +259,7 @@
bp->b_bcount = bsize * (1 + bforwards) - blockoff * DEV_BSIZE;
bp->b_bufsize = bp->b_bcount;
- if (vmapbuf(bp) < 0)
+ if (vmapbuf(bp, 1) < 0)
return EFAULT;
BO_STRATEGY(&dp->v_bufobj, bp);
@@ -275,7 +275,6 @@
struct buf *bp, *nbp, *tbp;
caddr_t sa, nsa, tsa;
u_int iolen;
- int spl;
caddr_t udata;
long resid;
off_t offset;
@@ -340,10 +339,7 @@
}
}
- spl = splbio();
bwait(bp, PRIBIO, "rawrd");
- splx(spl);
-
vunmapbuf(bp);
iolen = bp->b_bcount - bp->b_resid;
@@ -416,9 +412,7 @@
relpbuf(bp, &ffsrawbufcnt);
}
if (nbp != NULL) { /* Run down readahead buffer */
- spl = splbio();
bwait(nbp, PRIBIO, "rawrd");
- splx(spl);
vunmapbuf(nbp);
pbrelvp(nbp);
relpbuf(nbp, &ffsrawbufcnt);
Modified: trunk/sys/ufs/ffs/ffs_snapshot.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_snapshot.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_snapshot.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
*
@@ -34,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_snapshot.c 322132 2017-08-07 02:29:09Z mckusick $");
#include "opt_quota.h"
@@ -53,6 +54,7 @@
#include <sys/mount.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
+#include <sys/rwlock.h>
#include <sys/vnode.h>
#include <geom/geom.h>
@@ -255,7 +257,8 @@
* Create the snapshot file.
*/
restart:
- NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF, UIO_SYSSPACE, snapfile, td);
+ NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF | NOCACHE, UIO_SYSSPACE,
+ snapfile, td);
if ((error = namei(&nd)) != 0)
return (error);
if (nd.ni_vp != NULL) {
@@ -422,7 +425,7 @@
*/
for (;;) {
vn_finished_write(wrtmp);
- if ((error = vfs_write_suspend(vp->v_mount)) != 0) {
+ if ((error = vfs_write_suspend(vp->v_mount, 0)) != 0) {
vn_start_write(NULL, &wrtmp, V_WAIT);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
goto out;
@@ -674,7 +677,8 @@
VI_LOCK(devvp);
fs->fs_snapinum[snaploc] = ip->i_number;
if (ip->i_nextsnap.tqe_prev != 0)
- panic("ffs_snapshot: %d already on list", ip->i_number);
+ panic("ffs_snapshot: %ju already on list",
+ (uintmax_t)ip->i_number);
TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap);
devvp->v_vflag |= VV_COPYONWRITE;
VI_UNLOCK(devvp);
@@ -686,7 +690,7 @@
/*
* Resume operation on filesystem.
*/
- vfs_write_resume_flags(vp->v_mount, VR_START_WRITE | VR_NO_SUSPCLR);
+ vfs_write_resume(vp->v_mount, VR_START_WRITE | VR_NO_SUSPCLR);
if (collectsnapstats && starttime.tv_sec > 0) {
nanotime(&endtime);
timespecsub(&endtime, &starttime);
@@ -790,7 +794,7 @@
brelse(nbp);
} else {
loc = blkoff(fs, fs->fs_sblockloc);
- bcopy((char *)copy_fs, &nbp->b_data[loc], fs->fs_bsize);
+ bcopy((char *)copy_fs, &nbp->b_data[loc], (u_int)fs->fs_sbsize);
bawrite(nbp);
}
/*
@@ -849,7 +853,7 @@
mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA);
MNT_IUNLOCK(mp);
if (error)
- (void) ffs_truncate(vp, (off_t)0, 0, NOCRED, td);
+ (void) ffs_truncate(vp, (off_t)0, 0, NOCRED);
(void) ffs_syncvnode(vp, MNT_WAIT, 0);
if (error)
vput(vp);
@@ -1400,7 +1404,7 @@
*/
bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0);
bp->b_blkno = fsbtodb(fs, blkno);
- if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
+ if ((bp->b_flags & B_CACHE) == 0 &&
(error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) {
brelse(bp);
return (error);
@@ -1570,8 +1574,8 @@
if (xp != NULL)
vrele(ITOV(ip));
else if (snapdebug)
- printf("ffs_snapgone: lost snapshot vnode %d\n",
- ip->i_number);
+ printf("ffs_snapgone: lost snapshot vnode %ju\n",
+ (uintmax_t)ip->i_number);
/*
* Delete snapshot inode from superblock. Keep list dense.
*/
@@ -1604,7 +1608,7 @@
struct buf *ibp;
struct fs *fs;
ufs2_daddr_t numblks, blkno, dblk;
- int error, loc, last;
+ int error, i, last, loc;
struct snapdata *sn;
ip = VTOI(vp);
@@ -1624,10 +1628,14 @@
ip->i_nextsnap.tqe_prev = 0;
VI_UNLOCK(devvp);
lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
+ for (i = 0; i < sn->sn_lock.lk_recurse; i++)
+ lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
KASSERT(vp->v_vnlock == &sn->sn_lock,
("ffs_snapremove: lost lock mutation"));
vp->v_vnlock = &vp->v_lock;
VI_LOCK(devvp);
+ while (sn->sn_lock.lk_recurse > 0)
+ lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
try_free_snapdata(devvp);
} else
@@ -1741,7 +1749,7 @@
enum vtype vtype;
struct workhead *wkhd;
{
- struct buf *ibp, *cbp, *savedcbp = 0;
+ struct buf *ibp, *cbp, *savedcbp = NULL;
struct thread *td = curthread;
struct inode *ip;
struct vnode *vp = NULL;
@@ -1833,9 +1841,10 @@
if (size == fs->fs_bsize) {
#ifdef DEBUG
if (snapdebug)
- printf("%s %d lbn %jd from inum %d\n",
- "Grabonremove: snapino", ip->i_number,
- (intmax_t)lbn, inum);
+ printf("%s %ju lbn %jd from inum %ju\n",
+ "Grabonremove: snapino",
+ (uintmax_t)ip->i_number,
+ (intmax_t)lbn, (uintmax_t)inum);
#endif
/*
* If journaling is tracking this write we must add
@@ -1877,9 +1886,9 @@
break;
#ifdef DEBUG
if (snapdebug)
- printf("%s%d lbn %jd %s %d size %ld to blkno %jd\n",
- "Copyonremove: snapino ", ip->i_number,
- (intmax_t)lbn, "for inum", inum, size,
+ printf("%s%ju lbn %jd %s %ju size %ld to blkno %jd\n",
+ "Copyonremove: snapino ", (uintmax_t)ip->i_number,
+ (intmax_t)lbn, "for inum", (uintmax_t)inum, size,
(intmax_t)cbp->b_blkno);
#endif
/*
@@ -1892,7 +1901,7 @@
* dopersistence sysctl-setable flag to decide on the
* persistence needed for file content data.
*/
- if (savedcbp != 0) {
+ if (savedcbp != NULL) {
bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
bawrite(cbp);
if ((vtype == VDIR || dopersistence) &&
@@ -1936,7 +1945,7 @@
*/
if (error != 0 && wkhd != NULL)
softdep_freework(wkhd);
- lockmgr(vp->v_vnlock, LK_RELEASE, NULL);
+ lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
return (error);
}
@@ -1988,7 +1997,7 @@
reason = "non-snapshot";
} else {
reason = "old format snapshot";
- (void)ffs_truncate(vp, (off_t)0, 0, NOCRED, td);
+ (void)ffs_truncate(vp, (off_t)0, 0, NOCRED);
(void)ffs_syncvnode(vp, MNT_WAIT, 0);
}
printf("ffs_snapshot_mount: %s inode %d\n",
@@ -2020,8 +2029,8 @@
*/
VI_LOCK(devvp);
if (ip->i_nextsnap.tqe_prev != 0)
- panic("ffs_snapshot_mount: %d already on list",
- ip->i_number);
+ panic("ffs_snapshot_mount: %ju already on list",
+ (uintmax_t)ip->i_number);
else
TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap);
vp->v_vflag |= VV_SYSTEM;
@@ -2202,10 +2211,8 @@
if (bp_bdskip) {
VI_LOCK(devvp);
if (!ffs_bp_snapblk(vp, nbp)) {
- if (BO_MTX(bo) != VI_MTX(vp)) {
- VI_UNLOCK(devvp);
- BO_LOCK(bo);
- }
+ VI_UNLOCK(devvp);
+ BO_LOCK(bo);
BUF_UNLOCK(nbp);
continue;
}
@@ -2235,11 +2242,11 @@
struct buf *bp;
{
struct snapdata *sn;
- struct buf *ibp, *cbp, *savedcbp = 0;
+ struct buf *ibp, *cbp, *savedcbp = NULL;
struct thread *td = curthread;
struct fs *fs;
struct inode *ip;
- struct vnode *vp = 0;
+ struct vnode *vp = NULL;
ufs2_daddr_t lbn, blkno, *snapblklist;
int lower, upper, mid, indiroff, error = 0;
int launched_async_io, prev_norunningbuf;
@@ -2365,12 +2372,13 @@
break;
#ifdef DEBUG
if (snapdebug) {
- printf("Copyonwrite: snapino %d lbn %jd for ",
- ip->i_number, (intmax_t)lbn);
+ printf("Copyonwrite: snapino %ju lbn %jd for ",
+ (uintmax_t)ip->i_number, (intmax_t)lbn);
if (bp->b_vp == devvp)
printf("fs metadata");
else
- printf("inum %d", VTOI(bp->b_vp)->i_number);
+ printf("inum %ju",
+ (uintmax_t)VTOI(bp->b_vp)->i_number);
printf(" lblkno %jd to blkno %jd\n",
(intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno);
}
@@ -2385,7 +2393,7 @@
* dopersistence sysctl-setable flag to decide on the
* persistence needed for file content data.
*/
- if (savedcbp != 0) {
+ if (savedcbp != NULL) {
bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
bawrite(cbp);
if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
@@ -2636,33 +2644,47 @@
static struct snapdata *
ffs_snapdata_acquire(struct vnode *devvp)
{
- struct snapdata *nsn;
- struct snapdata *sn;
+ struct snapdata *nsn, *sn;
+ int error;
/*
- * Allocate a free snapdata. This is done before acquiring the
+ * Allocate a free snapdata. This is done before acquiring the
* devvp lock to avoid allocation while the devvp interlock is
* held.
*/
nsn = ffs_snapdata_alloc();
- /*
- * If there snapshots already exist on this filesystem grab a
- * reference to the shared lock. Otherwise this is the first
- * snapshot on this filesystem and we need to use our
- * pre-allocated snapdata.
- */
- VI_LOCK(devvp);
- if (devvp->v_rdev->si_snapdata == NULL) {
- devvp->v_rdev->si_snapdata = nsn;
- nsn = NULL;
+
+ for (;;) {
+ VI_LOCK(devvp);
+ sn = devvp->v_rdev->si_snapdata;
+ if (sn == NULL) {
+ /*
+ * This is the first snapshot on this
+ * filesystem and we use our pre-allocated
+ * snapdata. Publish sn with the sn_lock
+ * owned by us, to avoid the race.
+ */
+ error = lockmgr(&nsn->sn_lock, LK_EXCLUSIVE |
+ LK_NOWAIT, NULL);
+ if (error != 0)
+ panic("leaked sn, lockmgr error %d", error);
+ sn = devvp->v_rdev->si_snapdata = nsn;
+ VI_UNLOCK(devvp);
+ nsn = NULL;
+ break;
+ }
+
+ /*
+ * There is a snapshots which already exists on this
+ * filesystem, grab a reference to the common lock.
+ */
+ error = lockmgr(&sn->sn_lock, LK_INTERLOCK |
+ LK_EXCLUSIVE | LK_SLEEPFAIL, VI_MTX(devvp));
+ if (error == 0)
+ break;
}
- sn = devvp->v_rdev->si_snapdata;
+
/*
- * Acquire the snapshot lock.
- */
- lockmgr(&sn->sn_lock,
- LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, VI_MTX(devvp));
- /*
* Free any unused snapdata.
*/
if (nsn != NULL)
Modified: trunk/sys/ufs/ffs/ffs_softdep.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_softdep.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_softdep.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright 1998, 2000 Marshall Kirk McKusick.
* Copyright 2009, 2010 Jeffrey W. Roberson <jeff at FreeBSD.org>
@@ -40,7 +41,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_softdep.c 324612 2017-10-13 22:40:57Z jhb $");
#include "opt_ffs.h"
#include "opt_quota.h"
@@ -69,6 +70,7 @@
#include <sys/namei.h>
#include <sys/priv.h>
#include <sys/proc.h>
+#include <sys/rwlock.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
@@ -118,7 +120,7 @@
return (0);
}
-void
+void
softdep_initialize()
{
@@ -137,6 +139,7 @@
struct mount *mp;
{
+ panic("softdep_unmount called");
}
void
@@ -145,6 +148,8 @@
struct fs *fs;
struct buf *bp;
{
+
+ panic("softdep_setup_sbupdate called");
}
void
@@ -170,7 +175,7 @@
panic("softdep_setup_blkmapdep called");
}
-void
+void
softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
struct inode *ip;
ufs_lbn_t lbn;
@@ -184,7 +189,7 @@
panic("softdep_setup_allocdirect called");
}
-void
+void
softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
struct inode *ip;
ufs_lbn_t lbn;
@@ -263,7 +268,7 @@
panic("softdep_freefile called");
}
-int
+int
softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
struct buf *bp;
struct inode *dp;
@@ -276,7 +281,7 @@
panic("softdep_setup_directory_add called");
}
-void
+void
softdep_change_directoryentry_offset(bp, dp, base, oldloc, newloc, entrysize)
struct buf *bp;
struct inode *dp;
@@ -289,7 +294,7 @@
panic("softdep_change_directoryentry_offset called");
}
-void
+void
softdep_setup_remove(bp, dp, ip, isrmdir)
struct buf *bp;
struct inode *dp;
@@ -300,7 +305,7 @@
panic("softdep_setup_remove called");
}
-void
+void
softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
struct buf *bp;
struct inode *dp;
@@ -432,8 +437,6 @@
{
panic("%s called", __FUNCTION__);
-
- return (0);
}
int
@@ -461,7 +464,7 @@
panic("softdep_load_inodeblock called");
}
-void
+void
softdep_update_inodeblock(ip, bp, waitfor)
struct inode *ip;
struct buf *bp;
@@ -502,7 +505,7 @@
softdep_sync_metadata(struct vnode *vp)
{
- return (0);
+ panic("softdep_sync_metadata called");
}
int
@@ -509,7 +512,7 @@
softdep_sync_buf(struct vnode *vp, struct buf *bp, int waitfor)
{
- return (0);
+ panic("softdep_sync_buf called");
}
int
@@ -520,14 +523,6 @@
panic("softdep_slowdown called");
}
-void
-softdep_releasefile(ip)
- struct inode *ip; /* inode with the zero effective link count */
-{
-
- panic("softdep_releasefile called");
-}
-
int
softdep_request_cleanup(fs, vp, cred, resource)
struct fs *fs;
@@ -542,8 +537,8 @@
int
softdep_check_suspend(struct mount *mp,
struct vnode *devvp,
- int softdep_deps,
- int softdep_accdeps,
+ int softdep_depcnt,
+ int softdep_accdepcnt,
int secondary_writes,
int secondary_accwrites)
{
@@ -550,11 +545,11 @@
struct bufobj *bo;
int error;
- (void) softdep_deps,
- (void) softdep_accdeps;
+ (void) softdep_depcnt,
+ (void) softdep_accdepcnt;
bo = &devvp->v_bufobj;
- ASSERT_BO_LOCKED(bo);
+ ASSERT_BO_WLOCKED(bo);
MNT_ILOCK(mp);
while (mp->mnt_secondary_writes != 0) {
@@ -622,57 +617,22 @@
FEATURE(softupdates, "FFS soft-updates support");
-/*
- * These definitions need to be adapted to the system to which
- * this file is being ported.
- */
-
-#define M_SOFTDEP_FLAGS (M_WAITOK)
-
-#define D_PAGEDEP 0
-#define D_INODEDEP 1
-#define D_BMSAFEMAP 2
-#define D_NEWBLK 3
-#define D_ALLOCDIRECT 4
-#define D_INDIRDEP 5
-#define D_ALLOCINDIR 6
-#define D_FREEFRAG 7
-#define D_FREEBLKS 8
-#define D_FREEFILE 9
-#define D_DIRADD 10
-#define D_MKDIR 11
-#define D_DIRREM 12
-#define D_NEWDIRBLK 13
-#define D_FREEWORK 14
-#define D_FREEDEP 15
-#define D_JADDREF 16
-#define D_JREMREF 17
-#define D_JMVREF 18
-#define D_JNEWBLK 19
-#define D_JFREEBLK 20
-#define D_JFREEFRAG 21
-#define D_JSEG 22
-#define D_JSEGDEP 23
-#define D_SBDEP 24
-#define D_JTRUNC 25
-#define D_JFSYNC 26
-#define D_SENTINAL 27
-#define D_LAST D_SENTINAL
-
-unsigned long dep_current[D_LAST + 1];
-unsigned long dep_total[D_LAST + 1];
-unsigned long dep_write[D_LAST + 1];
-
-
static SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW, 0,
"soft updates stats");
static SYSCTL_NODE(_debug_softdep, OID_AUTO, total, CTLFLAG_RW, 0,
"total dependencies allocated");
+static SYSCTL_NODE(_debug_softdep, OID_AUTO, highuse, CTLFLAG_RW, 0,
+ "high use dependencies allocated");
static SYSCTL_NODE(_debug_softdep, OID_AUTO, current, CTLFLAG_RW, 0,
"current dependencies allocated");
static SYSCTL_NODE(_debug_softdep, OID_AUTO, write, CTLFLAG_RW, 0,
"current dependencies written");
+unsigned long dep_current[D_LAST + 1];
+unsigned long dep_highuse[D_LAST + 1];
+unsigned long dep_total[D_LAST + 1];
+unsigned long dep_write[D_LAST + 1];
+
#define SOFTDEP_TYPE(type, str, long) \
static MALLOC_DEFINE(M_ ## type, #str, long); \
SYSCTL_ULONG(_debug_softdep_total, OID_AUTO, str, CTLFLAG_RD, \
@@ -679,6 +639,8 @@
&dep_total[D_ ## type], 0, ""); \
SYSCTL_ULONG(_debug_softdep_current, OID_AUTO, str, CTLFLAG_RD, \
&dep_current[D_ ## type], 0, ""); \
+ SYSCTL_ULONG(_debug_softdep_highuse, OID_AUTO, str, CTLFLAG_RD, \
+ &dep_highuse[D_ ## type], 0, ""); \
SYSCTL_ULONG(_debug_softdep_write, OID_AUTO, str, CTLFLAG_RD, \
&dep_write[D_ ## type], 0, "");
@@ -711,9 +673,14 @@
SOFTDEP_TYPE(JTRUNC, jtrunc, "Journal inode truncation");
SOFTDEP_TYPE(JFSYNC, jfsync, "Journal fsync complete");
+static MALLOC_DEFINE(M_SENTINEL, "sentinel", "Worklist sentinel");
+
static MALLOC_DEFINE(M_SAVEDINO, "savedino", "Saved inodes");
static MALLOC_DEFINE(M_JBLOCKS, "jblocks", "Journal block locations");
+static MALLOC_DEFINE(M_MOUNTDATA, "softdep", "Softdep per-mount data");
+#define M_SOFTDEP_FLAGS (M_WAITOK)
+
/*
* translate from workitem type to memory type
* MUST match the defines above, such that memtype[D_XXX] == M_XXX
@@ -745,11 +712,10 @@
M_JSEGDEP,
M_SBDEP,
M_JTRUNC,
- M_JFSYNC
+ M_JFSYNC,
+ M_SENTINEL
};
-static LIST_HEAD(mkdirlist, mkdir) mkdirlisthd;
-
#define DtoM(type) (memtype[type])
/*
@@ -765,49 +731,17 @@
#define DOT_OFFSET offsetof(struct dirtemplate, dot_ino)
/*
- * Forward declarations.
- */
-struct inodedep_hashhead;
-struct newblk_hashhead;
-struct pagedep_hashhead;
-struct bmsafemap_hashhead;
-
-/*
- * Private journaling structures.
- */
-struct jblocks {
- struct jseglst jb_segs; /* TAILQ of current segments. */
- struct jseg *jb_writeseg; /* Next write to complete. */
- struct jseg *jb_oldestseg; /* Oldest segment with valid entries. */
- struct jextent *jb_extent; /* Extent array. */
- uint64_t jb_nextseq; /* Next sequence number. */
- uint64_t jb_oldestwrseq; /* Oldest written sequence number. */
- uint8_t jb_needseg; /* Need a forced segment. */
- uint8_t jb_suspended; /* Did journal suspend writes? */
- int jb_avail; /* Available extents. */
- int jb_used; /* Last used extent. */
- int jb_head; /* Allocator head. */
- int jb_off; /* Allocator extent offset. */
- int jb_blocks; /* Total disk blocks covered. */
- int jb_free; /* Total disk blocks free. */
- int jb_min; /* Minimum free space. */
- int jb_low; /* Low on space. */
- int jb_age; /* Insertion time of oldest rec. */
-};
-
-struct jextent {
- ufs2_daddr_t je_daddr; /* Disk block address. */
- int je_blocks; /* Disk block count. */
-};
-
-/*
* Internal function prototypes.
*/
+static void check_clear_deps(struct mount *);
static void softdep_error(char *, int);
+static int softdep_process_worklist(struct mount *, int);
+static int softdep_waitidle(struct mount *, int);
static void drain_output(struct vnode *);
-static struct buf *getdirtybuf(struct buf *, struct mtx *, int);
-static void clear_remove(struct thread *);
-static void clear_inodedeps(struct thread *);
+static struct buf *getdirtybuf(struct buf *, struct rwlock *, int);
+static int check_inodedep_free(struct inodedep *);
+static void clear_remove(struct mount *);
+static void clear_inodedeps(struct mount *);
static void unlinked_inodedep(struct mount *, struct inodedep *);
static void clear_unlinked_inodedep(struct inodedep *);
static struct inodedep *first_unlinked_inodedep(struct ufsmount *);
@@ -818,16 +752,16 @@
static int flush_inodedep_deps(struct vnode *, struct mount *, ino_t);
static int flush_deplist(struct allocdirectlst *, int, int *);
static int sync_cgs(struct mount *, int);
-static int handle_written_filepage(struct pagedep *, struct buf *);
+static int handle_written_filepage(struct pagedep *, struct buf *, int);
static int handle_written_sbdep(struct sbdep *, struct buf *);
static void initiate_write_sbdep(struct sbdep *);
-static void diradd_inode_written(struct diradd *, struct inodedep *);
+static void diradd_inode_written(struct diradd *, struct inodedep *);
static int handle_written_indirdep(struct indirdep *, struct buf *,
- struct buf**);
-static int handle_written_inodeblock(struct inodedep *, struct buf *);
+ struct buf**, int);
+static int handle_written_inodeblock(struct inodedep *, struct buf *, int);
static int jnewblk_rollforward(struct jnewblk *, struct fs *, struct cg *,
uint8_t *);
-static int handle_written_bmsafemap(struct bmsafemap *, struct buf *);
+static int handle_written_bmsafemap(struct bmsafemap *, struct buf *, int);
static void handle_written_jaddref(struct jaddref *);
static void handle_written_jremref(struct jremref *);
static void handle_written_jseg(struct jseg *, struct buf *);
@@ -918,7 +852,7 @@
static inline struct freeblks *newfreeblks(struct mount *, struct inode *);
static void freeblks_free(struct ufsmount *, struct freeblks *, int);
static void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t);
-ufs2_daddr_t blkcount(struct fs *, ufs2_daddr_t, off_t);
+static ufs2_daddr_t blkcount(struct fs *, ufs2_daddr_t, off_t);
static int trunc_check_buf(struct buf *, int *, ufs_lbn_t, int, int);
static void trunc_dependencies(struct inode *, struct freeblks *, ufs_lbn_t,
int, int);
@@ -951,22 +885,24 @@
struct allocdirect *, struct allocdirect *);
static struct freefrag *allocindir_merge(struct allocindir *,
struct allocindir *);
-static int bmsafemap_find(struct bmsafemap_hashhead *, struct mount *, int,
+static int bmsafemap_find(struct bmsafemap_hashhead *, int,
struct bmsafemap **);
static struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *,
int cg, struct bmsafemap *);
-static int newblk_find(struct newblk_hashhead *, struct mount *, ufs2_daddr_t,
- int, struct newblk **);
+static int newblk_find(struct newblk_hashhead *, ufs2_daddr_t, int,
+ struct newblk **);
static int newblk_lookup(struct mount *, ufs2_daddr_t, int, struct newblk **);
-static int inodedep_find(struct inodedep_hashhead *, struct fs *, ino_t,
+static int inodedep_find(struct inodedep_hashhead *, ino_t,
struct inodedep **);
static int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **);
static int pagedep_lookup(struct mount *, struct buf *bp, ino_t, ufs_lbn_t,
int, struct pagedep **);
static int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,
- struct mount *mp, int, struct pagedep **);
+ struct pagedep **);
static void pause_timer(void *);
static int request_cleanup(struct mount *, int);
+static void schedule_cleanup(struct mount *);
+static void softdep_ast_cleanup_proc(void);
static int process_worklist_item(struct mount *, int, int);
static void process_removes(struct vnode *);
static void process_truncates(struct vnode *);
@@ -976,12 +912,12 @@
static void wake_worklist(struct worklist *);
static void wait_worklist(struct worklist *, char *);
static void remove_from_worklist(struct worklist *);
-static void softdep_flush(void);
+static void softdep_flush(void *);
static void softdep_flushjournal(struct mount *);
-static int softdep_speedup(void);
-static void worklist_speedup(void);
+static int softdep_speedup(struct ufsmount *);
+static void worklist_speedup(struct mount *);
static int journal_mount(struct mount *, struct fs *, struct ucred *);
-static void journal_unmount(struct mount *);
+static void journal_unmount(struct ufsmount *);
static int journal_space(struct ufsmount *, int);
static void journal_suspend(struct ufsmount *);
static int journal_unsuspend(struct ufsmount *ump);
@@ -988,6 +924,7 @@
static void softdep_prelink(struct vnode *, struct vnode *);
static void add_to_journal(struct worklist *);
static void remove_from_journal(struct worklist *);
+static bool softdep_excess_items(struct ufsmount *, int);
static void softdep_process_journal(struct mount *, struct worklist *, int);
static struct jremref *newjremref(struct dirrem *, struct inode *,
struct inode *ip, off_t, nlink_t);
@@ -999,6 +936,7 @@
static struct jmvref *newjmvref(struct inode *, ino_t, off_t, off_t);
static struct jfreeblk *newjfreeblk(struct freeblks *, ufs_lbn_t,
ufs2_daddr_t, int);
+static void adjust_newfreework(struct freeblks *, int);
static struct jtrunc *newjtrunc(struct freeblks *, off_t, int);
static void move_newblock_dep(struct jaddref *, struct inodedep *);
static void cancel_jfreeblk(struct freeblks *, ufs2_daddr_t);
@@ -1027,16 +965,29 @@
static void softdep_deallocate_dependencies(struct buf *);
static int softdep_count_dependencies(struct buf *bp, int);
+/*
+ * Global lock over all of soft updates.
+ */
static struct mtx lk;
-MTX_SYSINIT(softdep_lock, &lk, "Softdep Lock", MTX_DEF);
+MTX_SYSINIT(softdep_lock, &lk, "Global Softdep Lock", MTX_DEF);
-#define TRY_ACQUIRE_LOCK(lk) mtx_trylock(lk)
-#define ACQUIRE_LOCK(lk) mtx_lock(lk)
-#define FREE_LOCK(lk) mtx_unlock(lk)
+#define ACQUIRE_GBLLOCK(lk) mtx_lock(lk)
+#define FREE_GBLLOCK(lk) mtx_unlock(lk)
+#define GBLLOCK_OWNED(lk) mtx_assert((lk), MA_OWNED)
-#define BUF_AREC(bp) lockallowrecurse(&(bp)->b_lock)
-#define BUF_NOREC(bp) lockdisablerecurse(&(bp)->b_lock)
+/*
+ * Per-filesystem soft-updates locking.
+ */
+#define LOCK_PTR(ump) (&(ump)->um_softdep->sd_fslock)
+#define TRY_ACQUIRE_LOCK(ump) rw_try_wlock(&(ump)->um_softdep->sd_fslock)
+#define ACQUIRE_LOCK(ump) rw_wlock(&(ump)->um_softdep->sd_fslock)
+#define FREE_LOCK(ump) rw_wunlock(&(ump)->um_softdep->sd_fslock)
+#define LOCK_OWNED(ump) rw_assert(&(ump)->um_softdep->sd_fslock, \
+ RA_WLOCKED)
+#define BUF_AREC(bp) lockallowrecurse(&(bp)->b_lock)
+#define BUF_NOREC(bp) lockdisablerecurse(&(bp)->b_lock)
+
/*
* Worklist queue management.
* These routines require that the lock be held.
@@ -1070,7 +1021,7 @@
{
if (locked)
- mtx_assert(&lk, MA_OWNED);
+ LOCK_OWNED(VFSTOUFS(item->wk_mp));
if (item->wk_state & ONWORKLIST)
panic("worklist_insert: %p %s(0x%X) already on list",
item, TYPENAME(item->wk_type), item->wk_state);
@@ -1085,7 +1036,7 @@
{
if (locked)
- mtx_assert(&lk, MA_OWNED);
+ LOCK_OWNED(VFSTOUFS(item->wk_mp));
if ((item->wk_state & ONWORKLIST) == 0)
panic("worklist_remove: %p %s(0x%X) not on list",
item, TYPENAME(item->wk_type), item->wk_state);
@@ -1158,7 +1109,6 @@
freedep = freedep_merge(WK_FREEDEP(wk), freedep);
}
- mtx_assert(&lk, MA_OWNED);
while ((wk = LIST_FIRST(src)) != NULL) {
WORKLIST_REMOVE(wk);
WORKLIST_INSERT(dst, wk);
@@ -1200,8 +1150,12 @@
*/
static void workitem_free(struct worklist *, int);
static void workitem_alloc(struct worklist *, int, struct mount *);
+static void workitem_reassign(struct worklist *, int);
-#define WORKITEM_FREE(item, type) workitem_free((struct worklist *)(item), (type))
+#define WORKITEM_FREE(item, type) \
+ workitem_free((struct worklist *)(item), (type))
+#define WORKITEM_REASSIGN(item, type) \
+ workitem_reassign((struct worklist *)(item), (type))
static void
workitem_free(item, type)
@@ -1209,13 +1163,12 @@
int type;
{
struct ufsmount *ump;
- mtx_assert(&lk, MA_OWNED);
#ifdef DEBUG
if (item->wk_state & ONWORKLIST)
panic("workitem_free: %s(0x%X) still on list",
TYPENAME(item->wk_type), item->wk_state);
- if (item->wk_type != type)
+ if (item->wk_type != type && type != D_NEWBLK)
panic("workitem_free: type mismatch %s != %s",
TYPENAME(item->wk_type), TYPENAME(type));
#endif
@@ -1222,9 +1175,20 @@
if (item->wk_state & IOWAITING)
wakeup(item);
ump = VFSTOUFS(item->wk_mp);
+ LOCK_OWNED(ump);
+ KASSERT(ump->softdep_deps > 0,
+ ("workitem_free: %s: softdep_deps going negative",
+ ump->um_fs->fs_fsmnt));
if (--ump->softdep_deps == 0 && ump->softdep_req)
wakeup(&ump->softdep_deps);
- dep_current[type]--;
+ KASSERT(dep_current[item->wk_type] > 0,
+ ("workitem_free: %s: dep_current[%s] going negative",
+ ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+ KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
+ ("workitem_free: %s: softdep_curdeps[%s] going negative",
+ ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+ atomic_subtract_long(&dep_current[item->wk_type], 1);
+ ump->softdep_curdeps[item->wk_type] -= 1;
free(item, DtoM(type));
}
@@ -1241,24 +1205,54 @@
item->wk_state = 0;
ump = VFSTOUFS(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_GBLLOCK(&lk);
dep_current[type]++;
+ if (dep_current[type] > dep_highuse[type])
+ dep_highuse[type] = dep_current[type];
dep_total[type]++;
+ FREE_GBLLOCK(&lk);
+ ACQUIRE_LOCK(ump);
+ ump->softdep_curdeps[type] += 1;
ump->softdep_deps++;
ump->softdep_accdeps++;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
+static void
+workitem_reassign(item, newtype)
+ struct worklist *item;
+ int newtype;
+{
+ struct ufsmount *ump;
+
+ ump = VFSTOUFS(item->wk_mp);
+ LOCK_OWNED(ump);
+ KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
+ ("workitem_reassign: %s: softdep_curdeps[%s] going negative",
+ VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+ ump->softdep_curdeps[item->wk_type] -= 1;
+ ump->softdep_curdeps[newtype] += 1;
+ KASSERT(dep_current[item->wk_type] > 0,
+ ("workitem_reassign: %s: dep_current[%s] going negative",
+ VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+ ACQUIRE_GBLLOCK(&lk);
+ dep_current[newtype]++;
+ dep_current[item->wk_type]--;
+ if (dep_current[newtype] > dep_highuse[newtype])
+ dep_highuse[newtype] = dep_current[newtype];
+ dep_total[newtype]++;
+ FREE_GBLLOCK(&lk);
+ item->wk_type = newtype;
+}
+
/*
* Workitem queue management
*/
static int max_softdeps; /* maximum number of structs before slowdown */
-static int maxindirdeps = 50; /* max number of indirdeps before slowdown */
static int tickdelay = 2; /* number of ticks to pause during slowdown */
static int proc_waiting; /* tracks whether we have a timeout posted */
static int *stat_countp; /* statistic to count in proc_waiting timeout */
static struct callout softdep_callout;
-static int req_pending;
static int req_clear_inodedeps; /* syncer process flush some inodedeps */
static int req_clear_remove; /* syncer process flush some freeblks */
static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */
@@ -1266,6 +1260,7 @@
/*
* runtime statistics
*/
+static int stat_flush_threads; /* number of softdep flushing threads */
static int stat_worklist_push; /* number of worklist cleanups */
static int stat_blk_limit_push; /* number of times block limit neared */
static int stat_ino_limit_push; /* number of times inode limit neared */
@@ -1290,13 +1285,14 @@
static int stat_cleanup_inorequests; /* Number of inode cleanup requests */
static int stat_cleanup_retries; /* Number of cleanups that needed to flush */
static int stat_cleanup_failures; /* Number of cleanup requests that failed */
+static int stat_emptyjblocks; /* Number of potentially empty journal blocks */
SYSCTL_INT(_debug_softdep, OID_AUTO, max_softdeps, CTLFLAG_RW,
&max_softdeps, 0, "");
SYSCTL_INT(_debug_softdep, OID_AUTO, tickdelay, CTLFLAG_RW,
&tickdelay, 0, "");
-SYSCTL_INT(_debug_softdep, OID_AUTO, maxindirdeps, CTLFLAG_RW,
- &maxindirdeps, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, flush_threads, CTLFLAG_RD,
+ &stat_flush_threads, 0, "");
SYSCTL_INT(_debug_softdep, OID_AUTO, worklist_push, CTLFLAG_RW,
&stat_worklist_push, 0,"");
SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_push, CTLFLAG_RW,
@@ -1347,103 +1343,146 @@
&stat_cleanup_failures, 0, "");
SYSCTL_INT(_debug_softdep, OID_AUTO, flushcache, CTLFLAG_RW,
&softdep_flushcache, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, emptyjblocks, CTLFLAG_RD,
+ &stat_emptyjblocks, 0, "");
SYSCTL_DECL(_vfs_ffs);
-LIST_HEAD(bmsafemap_hashhead, bmsafemap) *bmsafemap_hashtbl;
-static u_long bmsafemap_hash; /* size of hash table - 1 */
-
-static int compute_summary_at_mount = 0; /* Whether to recompute the summary at mount time */
+/* Whether to recompute the summary at mount time */
+static int compute_summary_at_mount = 0;
SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW,
&compute_summary_at_mount, 0, "Recompute summary at mount");
+static int print_threads = 0;
+SYSCTL_INT(_debug_softdep, OID_AUTO, print_threads, CTLFLAG_RW,
+ &print_threads, 0, "Notify flusher thread start/stop");
-static struct proc *softdepproc;
-static struct kproc_desc softdep_kp = {
- "softdepflush",
- softdep_flush,
- &softdepproc
-};
-SYSINIT(sdproc, SI_SUB_KTHREAD_UPDATE, SI_ORDER_ANY, kproc_start,
- &softdep_kp);
+/* List of all filesystems mounted with soft updates */
+static TAILQ_HEAD(, mount_softdeps) softdepmounts;
+/*
+ * This function cleans the worklist for a filesystem.
+ * Each filesystem running with soft dependencies gets its own
+ * thread to run in this function. The thread is started up in
+ * softdep_mount and shutdown in softdep_unmount. They show up
+ * as part of the kernel "bufdaemon" process whose process
+ * entry is available in bufdaemonproc.
+ */
+static int searchfailed;
+extern struct proc *bufdaemonproc;
static void
-softdep_flush(void)
+softdep_flush(addr)
+ void *addr;
{
- struct mount *nmp;
struct mount *mp;
+ struct thread *td;
struct ufsmount *ump;
- struct thread *td;
- int remaining;
- int progress;
- int vfslocked;
td = curthread;
td->td_pflags |= TDP_NORUNNINGBUF;
-
+ mp = (struct mount *)addr;
+ ump = VFSTOUFS(mp);
+ atomic_add_int(&stat_flush_threads, 1);
+ ACQUIRE_LOCK(ump);
+ ump->softdep_flags &= ~FLUSH_STARTING;
+ wakeup(&ump->softdep_flushtd);
+ FREE_LOCK(ump);
+ if (print_threads) {
+ if (stat_flush_threads == 1)
+ printf("Running %s at pid %d\n", bufdaemonproc->p_comm,
+ bufdaemonproc->p_pid);
+ printf("Start thread %s\n", td->td_name);
+ }
for (;;) {
- kproc_suspend_check(softdepproc);
- vfslocked = VFS_LOCK_GIANT((struct mount *)NULL);
- ACQUIRE_LOCK(&lk);
+ while (softdep_process_worklist(mp, 0) > 0 ||
+ (MOUNTEDSUJ(mp) &&
+ VFSTOUFS(mp)->softdep_jblocks->jb_suspended))
+ kthread_suspend_check();
+ ACQUIRE_LOCK(ump);
+ if ((ump->softdep_flags & (FLUSH_CLEANUP | FLUSH_EXIT)) == 0)
+ msleep(&ump->softdep_flushtd, LOCK_PTR(ump), PVM,
+ "sdflush", hz / 2);
+ ump->softdep_flags &= ~FLUSH_CLEANUP;
/*
- * If requested, try removing inode or removal dependencies.
+ * Check to see if we are done and need to exit.
*/
- if (req_clear_inodedeps) {
- clear_inodedeps(td);
- req_clear_inodedeps -= 1;
- wakeup_one(&proc_waiting);
+ if ((ump->softdep_flags & FLUSH_EXIT) == 0) {
+ FREE_LOCK(ump);
+ continue;
}
- if (req_clear_remove) {
- clear_remove(td);
- req_clear_remove -= 1;
- wakeup_one(&proc_waiting);
- }
- FREE_LOCK(&lk);
- VFS_UNLOCK_GIANT(vfslocked);
- remaining = progress = 0;
- mtx_lock(&mountlist_mtx);
- for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
- nmp = TAILQ_NEXT(mp, mnt_list);
- if (MOUNTEDSOFTDEP(mp) == 0)
- continue;
- if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
- continue;
- vfslocked = VFS_LOCK_GIANT(mp);
- progress += softdep_process_worklist(mp, 0);
- ump = VFSTOUFS(mp);
- remaining += ump->softdep_on_worklist;
- VFS_UNLOCK_GIANT(vfslocked);
- mtx_lock(&mountlist_mtx);
- nmp = TAILQ_NEXT(mp, mnt_list);
- vfs_unbusy(mp);
- }
- mtx_unlock(&mountlist_mtx);
- if (remaining && progress)
- continue;
- ACQUIRE_LOCK(&lk);
- if (!req_pending)
- msleep(&req_pending, &lk, PVM, "sdflush", hz);
- req_pending = 0;
- FREE_LOCK(&lk);
+ ump->softdep_flags &= ~FLUSH_EXIT;
+ FREE_LOCK(ump);
+ wakeup(&ump->softdep_flags);
+ if (print_threads)
+ printf("Stop thread %s: searchfailed %d, did cleanups %d\n", td->td_name, searchfailed, ump->um_softdep->sd_cleanups);
+ atomic_subtract_int(&stat_flush_threads, 1);
+ kthread_exit();
+ panic("kthread_exit failed\n");
}
}
static void
-worklist_speedup(void)
+worklist_speedup(mp)
+ struct mount *mp;
{
- mtx_assert(&lk, MA_OWNED);
- if (req_pending == 0) {
- req_pending = 1;
- wakeup(&req_pending);
- }
+ struct ufsmount *ump;
+
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
+ if ((ump->softdep_flags & (FLUSH_CLEANUP | FLUSH_EXIT)) == 0)
+ ump->softdep_flags |= FLUSH_CLEANUP;
+ wakeup(&ump->softdep_flushtd);
}
static int
-softdep_speedup(void)
+softdep_speedup(ump)
+ struct ufsmount *ump;
{
+ struct ufsmount *altump;
+ struct mount_softdeps *sdp;
- worklist_speedup();
+ LOCK_OWNED(ump);
+ worklist_speedup(ump->um_mountp);
bd_speedup();
- return speedup_syncer();
+ /*
+ * If we have global shortages, then we need other
+ * filesystems to help with the cleanup. Here we wakeup a
+ * flusher thread for a filesystem that is over its fair
+ * share of resources.
+ */
+ if (req_clear_inodedeps || req_clear_remove) {
+ ACQUIRE_GBLLOCK(&lk);
+ TAILQ_FOREACH(sdp, &softdepmounts, sd_next) {
+ if ((altump = sdp->sd_ump) == ump)
+ continue;
+ if (((req_clear_inodedeps &&
+ altump->softdep_curdeps[D_INODEDEP] >
+ max_softdeps / stat_flush_threads) ||
+ (req_clear_remove &&
+ altump->softdep_curdeps[D_DIRREM] >
+ (max_softdeps / 2) / stat_flush_threads)) &&
+ TRY_ACQUIRE_LOCK(altump))
+ break;
+ }
+ if (sdp == NULL) {
+ searchfailed++;
+ FREE_GBLLOCK(&lk);
+ } else {
+ /*
+ * Move to the end of the list so we pick a
+ * different one on out next try.
+ */
+ TAILQ_REMOVE(&softdepmounts, sdp, sd_next);
+ TAILQ_INSERT_TAIL(&softdepmounts, sdp, sd_next);
+ FREE_GBLLOCK(&lk);
+ if ((altump->softdep_flags &
+ (FLUSH_CLEANUP | FLUSH_EXIT)) == 0)
+ altump->softdep_flags |= FLUSH_CLEANUP;
+ altump->um_softdep->sd_cleanups++;
+ wakeup(&altump->softdep_flushtd);
+ FREE_LOCK(altump);
+ }
+ }
+ return (speedup_syncer());
}
/*
@@ -1464,8 +1503,8 @@
{
struct ufsmount *ump;
- mtx_assert(&lk, MA_OWNED);
ump = VFSTOUFS(wk->wk_mp);
+ LOCK_OWNED(ump);
if (wk->wk_state & ONWORKLIST)
panic("add_to_worklist: %s(0x%X) already on list",
TYPENAME(wk->wk_type), wk->wk_state);
@@ -1481,7 +1520,7 @@
}
ump->softdep_on_worklist += 1;
if (flags & WK_NODELAY)
- worklist_speedup();
+ worklist_speedup(wk->wk_mp);
}
/*
@@ -1517,9 +1556,11 @@
struct worklist *wk;
char *wmesg;
{
+ struct ufsmount *ump;
+ ump = VFSTOUFS(wk->wk_mp);
wk->wk_state |= IOWAITING;
- msleep(wk, &lk, PVM, wmesg, 0);
+ msleep(wk, LOCK_PTR(ump), PVM, wmesg, 0);
}
/*
@@ -1531,58 +1572,44 @@
* ordering ensures that no new <vfsid, inum, lbn> triples will be generated
* until all the old ones have been purged from the dependency lists.
*/
-int
+static int
softdep_process_worklist(mp, full)
struct mount *mp;
int full;
{
- struct thread *td = curthread;
int cnt, matchcnt;
struct ufsmount *ump;
long starttime;
KASSERT(mp != NULL, ("softdep_process_worklist: NULL mp"));
- /*
- * Record the process identifier of our caller so that we can give
- * this process preferential treatment in request_cleanup below.
- */
+ if (MOUNTEDSOFTDEP(mp) == 0)
+ return (0);
matchcnt = 0;
ump = VFSTOUFS(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
starttime = time_second;
- softdep_process_journal(mp, NULL, full?MNT_WAIT:0);
+ softdep_process_journal(mp, NULL, full ? MNT_WAIT : 0);
+ check_clear_deps(mp);
while (ump->softdep_on_worklist > 0) {
if ((cnt = process_worklist_item(mp, 10, LK_NOWAIT)) == 0)
break;
else
matchcnt += cnt;
+ check_clear_deps(mp);
/*
- * If requested, try removing inode or removal dependencies.
- */
- if (req_clear_inodedeps) {
- clear_inodedeps(td);
- req_clear_inodedeps -= 1;
- wakeup_one(&proc_waiting);
- }
- if (req_clear_remove) {
- clear_remove(td);
- req_clear_remove -= 1;
- wakeup_one(&proc_waiting);
- }
- /*
* We do not generally want to stop for buffer space, but if
* we are really being a buffer hog, we will stop and wait.
*/
if (should_yield()) {
- FREE_LOCK(&lk);
- kern_yield(PRI_UNCHANGED);
+ FREE_LOCK(ump);
+ kern_yield(PRI_USER);
bwillwrite();
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
/*
* Never allow processing to run for more than one
- * second. Otherwise the other mountpoints may get
- * excessively backlogged.
+ * second. This gives the syncer thread the opportunity
+ * to pause if appropriate.
*/
if (!full && starttime != time_second)
break;
@@ -1589,7 +1616,7 @@
}
if (full == 0)
journal_unsuspend(ump);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return (matchcnt);
}
@@ -1604,12 +1631,13 @@
{
struct inodedep *inodedep;
struct dirrem *dirrem;
+ struct ufsmount *ump;
struct mount *mp;
ino_t inum;
- mtx_assert(&lk, MA_OWNED);
-
mp = vp->v_mount;
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
inum = VTOI(vp)->i_number;
for (;;) {
top:
@@ -1632,12 +1660,12 @@
if (dirrem == NULL)
return;
remove_from_worklist(&dirrem->dm_list);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
panic("process_removes: suspended filesystem");
handle_workitem_remove(dirrem, 0);
vn_finished_secondary_write(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
}
@@ -1653,13 +1681,14 @@
{
struct inodedep *inodedep;
struct freeblks *freeblks;
+ struct ufsmount *ump;
struct mount *mp;
ino_t inum;
int cgwait;
- mtx_assert(&lk, MA_OWNED);
-
mp = vp->v_mount;
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
inum = VTOI(vp)->i_number;
for (;;) {
if (inodedep_lookup(mp, inum, 0, &inodedep) == 0)
@@ -1680,9 +1709,9 @@
}
/* Freeblks is waiting on a inode write. */
if ((freeblks->fb_state & COMPLETE) == 0) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
ffs_update(vp, 1);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
break;
}
if ((freeblks->fb_state & (ALLCOMPLETE | ONWORKLIST)) ==
@@ -1689,7 +1718,7 @@
(ALLCOMPLETE | ONWORKLIST)) {
remove_from_worklist(&freeblks->fb_list);
freeblks->fb_state |= INPROGRESS;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (vn_start_secondary_write(NULL, &mp,
V_NOWAIT))
panic("process_truncates: "
@@ -1696,7 +1725,7 @@
"suspended filesystem");
handle_workitem_freeblocks(freeblks, 0);
vn_finished_secondary_write(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
break;
}
if (freeblks->fb_cgwait)
@@ -1703,10 +1732,10 @@
cgwait++;
}
if (cgwait) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
sync_cgs(mp, MNT_WAIT);
ffs_sync_snap(mp, MNT_WAIT);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
continue;
}
if (freeblks == NULL)
@@ -1724,13 +1753,12 @@
int target;
int flags;
{
- struct worklist sintenel;
+ struct worklist sentinel;
struct worklist *wk;
struct ufsmount *ump;
int matchcnt;
int error;
- mtx_assert(&lk, MA_OWNED);
KASSERT(mp != NULL, ("process_worklist_item: NULL mp"));
/*
* If we are being called because of a process doing a
@@ -1741,15 +1769,16 @@
return (-1);
PHOLD(curproc); /* Don't let the stack go away. */
ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
matchcnt = 0;
- sintenel.wk_mp = NULL;
- sintenel.wk_type = D_SENTINAL;
- LIST_INSERT_HEAD(&ump->softdep_workitem_pending, &sintenel, wk_list);
- for (wk = LIST_NEXT(&sintenel, wk_list); wk != NULL;
- wk = LIST_NEXT(&sintenel, wk_list)) {
- if (wk->wk_type == D_SENTINAL) {
- LIST_REMOVE(&sintenel, wk_list);
- LIST_INSERT_AFTER(wk, &sintenel, wk_list);
+ sentinel.wk_mp = NULL;
+ sentinel.wk_type = D_SENTINEL;
+ LIST_INSERT_HEAD(&ump->softdep_workitem_pending, &sentinel, wk_list);
+ for (wk = LIST_NEXT(&sentinel, wk_list); wk != NULL;
+ wk = LIST_NEXT(&sentinel, wk_list)) {
+ if (wk->wk_type == D_SENTINEL) {
+ LIST_REMOVE(&sentinel, wk_list);
+ LIST_INSERT_AFTER(wk, &sentinel, wk_list);
continue;
}
if (wk->wk_state & INPROGRESS)
@@ -1757,7 +1786,7 @@
wk);
wk->wk_state |= INPROGRESS;
remove_from_worklist(wk);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
panic("process_worklist_item: suspended filesystem");
switch (wk->wk_type) {
@@ -1790,7 +1819,7 @@
/* NOTREACHED */
}
vn_finished_secondary_write(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
if (error == 0) {
if (++matchcnt == target)
break;
@@ -1806,11 +1835,11 @@
wake_worklist(wk);
add_to_worklist(wk, WK_HEAD);
}
- LIST_REMOVE(&sintenel, wk_list);
+ LIST_REMOVE(&sentinel, wk_list);
/* Sentinal could've become the tail from remove_from_worklist. */
- if (ump->softdep_worklist_tail == &sintenel)
+ if (ump->softdep_worklist_tail == &sentinel)
ump->softdep_worklist_tail =
- (struct worklist *)sintenel.wk_list.le_prev;
+ (struct worklist *)sentinel.wk_list.le_prev;
PRELE(curproc);
return (matchcnt);
}
@@ -1824,23 +1853,29 @@
struct buf *newbp;
{
struct worklist *wk, *wktail;
+ struct ufsmount *ump;
int dirty;
+ if ((wk = LIST_FIRST(&oldbp->b_dep)) == NULL)
+ return (0);
+ KASSERT(MOUNTEDSOFTDEP(wk->wk_mp) != 0,
+ ("softdep_move_dependencies called on non-softdep filesystem"));
dirty = 0;
wktail = NULL;
- ACQUIRE_LOCK(&lk);
+ ump = VFSTOUFS(wk->wk_mp);
+ ACQUIRE_LOCK(ump);
while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {
LIST_REMOVE(wk, wk_list);
if (wk->wk_type == D_BMSAFEMAP &&
bmsafemap_backgroundwrite(WK_BMSAFEMAP(wk), newbp))
dirty = 1;
- if (wktail == 0)
+ if (wktail == NULL)
LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);
else
LIST_INSERT_AFTER(wktail, wk, wk_list);
wktail = wk;
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return (dirty);
}
@@ -1855,8 +1890,8 @@
struct thread *td;
{
struct vnode *devvp;
- int count, error = 0;
struct ufsmount *ump;
+ int count, error;
/*
* Alternately flush the block device associated with the mount
@@ -1865,6 +1900,7 @@
* are found.
*/
*countp = 0;
+ error = 0;
ump = VFSTOUFS(oldmnt);
devvp = ump->um_devvp;
while ((count = softdep_process_worklist(oldmnt, 1)) > 0) {
@@ -1872,36 +1908,47 @@
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
error = VOP_FSYNC(devvp, MNT_WAIT, td);
VOP_UNLOCK(devvp, 0);
- if (error)
+ if (error != 0)
break;
}
return (error);
}
-int
-softdep_waitidle(struct mount *mp)
+#define SU_WAITIDLE_RETRIES 20
+static int
+softdep_waitidle(struct mount *mp, int flags __unused)
{
struct ufsmount *ump;
- int error;
- int i;
+ struct vnode *devvp;
+ struct thread *td;
+ int error, i;
ump = VFSTOUFS(mp);
- ACQUIRE_LOCK(&lk);
- for (i = 0; i < 10 && ump->softdep_deps; i++) {
+ devvp = ump->um_devvp;
+ td = curthread;
+ error = 0;
+ ACQUIRE_LOCK(ump);
+ for (i = 0; i < SU_WAITIDLE_RETRIES && ump->softdep_deps != 0; i++) {
ump->softdep_req = 1;
- if (ump->softdep_on_worklist)
- panic("softdep_waitidle: work added after flush.");
- msleep(&ump->softdep_deps, &lk, PVM, "softdeps", 1);
+ KASSERT((flags & FORCECLOSE) == 0 ||
+ ump->softdep_on_worklist == 0,
+ ("softdep_waitidle: work added after flush"));
+ msleep(&ump->softdep_deps, LOCK_PTR(ump), PVM | PDROP,
+ "softdeps", 10 * hz);
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+ error = VOP_FSYNC(devvp, MNT_WAIT, td);
+ VOP_UNLOCK(devvp, 0);
+ ACQUIRE_LOCK(ump);
+ if (error != 0)
+ break;
}
ump->softdep_req = 0;
- FREE_LOCK(&lk);
- error = 0;
- if (i == 10) {
+ if (i == SU_WAITIDLE_RETRIES && error == 0 && ump->softdep_deps != 0) {
error = EBUSY;
printf("softdep_waitidle: Failed to flush worklist for %p\n",
mp);
}
-
+ FREE_LOCK(ump);
return (error);
}
@@ -1921,6 +1968,8 @@
int error, early, depcount, loopcnt, retry_flush_count, retry;
int morework;
+ KASSERT(MOUNTEDSOFTDEP(oldmnt) != 0,
+ ("softdep_flushfiles called on non-softdep filesystem"));
loopcnt = 10;
retry_flush_count = 3;
retry_flush:
@@ -1956,7 +2005,7 @@
error = EBUSY;
}
if (!error)
- error = softdep_waitidle(oldmnt);
+ error = softdep_waitidle(oldmnt, flags);
if (!error) {
if (oldmnt->mnt_kern_flag & MNTK_UNMOUNT) {
retry = 0;
@@ -1991,12 +2040,14 @@
/*
* Structure hashing.
*
- * There are three types of structures that can be looked up:
+ * There are four types of structures that can be looked up:
* 1) pagedep structures identified by mount point, inode number,
* and logical block.
* 2) inodedep structures identified by mount point and inode number.
* 3) newblk structures identified by mount point and
* physical block number.
+ * 4) bmsafemap structures identified by mount point and
+ * cylinder group number.
*
* The "pagedep" and "inodedep" dependency structures are hashed
* separately from the file blocks and inodes to which they correspond.
@@ -2008,34 +2059,28 @@
* their allocdirect or allocindir structure.
*
* The lookup routines optionally create and hash a new instance when
- * an existing entry is not found.
+ * an existing entry is not found. The bmsafemap lookup routine always
+ * allocates a new structure if an existing one is not found.
*/
#define DEPALLOC 0x0001 /* allocate structure if lookup fails */
-#define NODELAY 0x0002 /* cannot do background work */
/*
* Structures and routines associated with pagedep caching.
*/
-LIST_HEAD(pagedep_hashhead, pagedep) *pagedep_hashtbl;
-u_long pagedep_hash; /* size of hash table - 1 */
-#define PAGEDEP_HASH(mp, inum, lbn) \
- (&pagedep_hashtbl[((((register_t)(mp)) >> 13) + (inum) + (lbn)) & \
- pagedep_hash])
+#define PAGEDEP_HASH(ump, inum, lbn) \
+ (&(ump)->pagedep_hashtbl[((inum) + (lbn)) & (ump)->pagedep_hash_size])
static int
-pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp)
+pagedep_find(pagedephd, ino, lbn, pagedeppp)
struct pagedep_hashhead *pagedephd;
ino_t ino;
ufs_lbn_t lbn;
- struct mount *mp;
- int flags;
struct pagedep **pagedeppp;
{
struct pagedep *pagedep;
LIST_FOREACH(pagedep, pagedephd, pd_hash) {
- if (ino == pagedep->pd_ino && lbn == pagedep->pd_lbn &&
- mp == pagedep->pd_list.wk_mp) {
+ if (ino == pagedep->pd_ino && lbn == pagedep->pd_lbn) {
*pagedeppp = pagedep;
return (1);
}
@@ -2061,10 +2106,12 @@
struct pagedep *pagedep;
struct pagedep_hashhead *pagedephd;
struct worklist *wk;
+ struct ufsmount *ump;
int ret;
int i;
- mtx_assert(&lk, MA_OWNED);
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
if (bp) {
LIST_FOREACH(wk, &bp->b_dep, wk_list) {
if (wk->wk_type == D_PAGEDEP) {
@@ -2073,8 +2120,8 @@
}
}
}
- pagedephd = PAGEDEP_HASH(mp, ino, lbn);
- ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp);
+ pagedephd = PAGEDEP_HASH(ump, ino, lbn);
+ ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
if (ret) {
if (((*pagedeppp)->pd_state & ONWORKLIST) == 0 && bp)
WORKLIST_INSERT(&bp->b_dep, &(*pagedeppp)->pd_list);
@@ -2082,12 +2129,12 @@
}
if ((flags & DEPALLOC) == 0)
return (0);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
pagedep = malloc(sizeof(struct pagedep),
M_PAGEDEP, M_SOFTDEP_FLAGS|M_ZERO);
workitem_alloc(&pagedep->pd_list, D_PAGEDEP, mp);
- ACQUIRE_LOCK(&lk);
- ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp);
+ ACQUIRE_LOCK(ump);
+ ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
if (*pagedeppp) {
/*
* This should never happen since we only create pagedeps
@@ -2111,15 +2158,12 @@
/*
* Structures and routines associated with inodedep caching.
*/
-LIST_HEAD(inodedep_hashhead, inodedep) *inodedep_hashtbl;
-static u_long inodedep_hash; /* size of hash table - 1 */
-#define INODEDEP_HASH(fs, inum) \
- (&inodedep_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & inodedep_hash])
+#define INODEDEP_HASH(ump, inum) \
+ (&(ump)->inodedep_hashtbl[(inum) & (ump)->inodedep_hash_size])
static int
-inodedep_find(inodedephd, fs, inum, inodedeppp)
+inodedep_find(inodedephd, inum, inodedeppp)
struct inodedep_hashhead *inodedephd;
- struct fs *fs;
ino_t inum;
struct inodedep **inodedeppp;
{
@@ -2126,7 +2170,7 @@
struct inodedep *inodedep;
LIST_FOREACH(inodedep, inodedephd, id_hash)
- if (inum == inodedep->id_ino && fs == inodedep->id_fs)
+ if (inum == inodedep->id_ino)
break;
if (inodedep) {
*inodedeppp = inodedep;
@@ -2151,27 +2195,32 @@
{
struct inodedep *inodedep;
struct inodedep_hashhead *inodedephd;
+ struct ufsmount *ump;
struct fs *fs;
- mtx_assert(&lk, MA_OWNED);
- fs = VFSTOUFS(mp)->um_fs;
- inodedephd = INODEDEP_HASH(fs, inum);
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
+ fs = ump->um_fs;
+ inodedephd = INODEDEP_HASH(ump, inum);
- if (inodedep_find(inodedephd, fs, inum, inodedeppp))
+ if (inodedep_find(inodedephd, inum, inodedeppp))
return (1);
if ((flags & DEPALLOC) == 0)
return (0);
/*
- * If we are over our limit, try to improve the situation.
+ * If the system is over its limit and our filesystem is
+ * responsible for more than our share of that usage and
+ * we are not in a rush, request some inodedep cleanup.
*/
- if (dep_current[D_INODEDEP] > max_softdeps && (flags & NODELAY) == 0)
- request_cleanup(mp, FLUSH_INODES);
- FREE_LOCK(&lk);
+ if (softdep_excess_items(ump, D_INODEDEP))
+ schedule_cleanup(mp);
+ else
+ FREE_LOCK(ump);
inodedep = malloc(sizeof(struct inodedep),
M_INODEDEP, M_SOFTDEP_FLAGS);
workitem_alloc(&inodedep->id_list, D_INODEDEP, mp);
- ACQUIRE_LOCK(&lk);
- if (inodedep_find(inodedephd, fs, inum, inodedeppp)) {
+ ACQUIRE_LOCK(ump);
+ if (inodedep_find(inodedephd, inum, inodedeppp)) {
WORKITEM_FREE(inodedep, D_INODEDEP);
return (1);
}
@@ -2203,15 +2252,12 @@
/*
* Structures and routines associated with newblk caching.
*/
-LIST_HEAD(newblk_hashhead, newblk) *newblk_hashtbl;
-u_long newblk_hash; /* size of hash table - 1 */
-#define NEWBLK_HASH(fs, inum) \
- (&newblk_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & newblk_hash])
+#define NEWBLK_HASH(ump, inum) \
+ (&(ump)->newblk_hashtbl[(inum) & (ump)->newblk_hash_size])
static int
-newblk_find(newblkhd, mp, newblkno, flags, newblkpp)
+newblk_find(newblkhd, newblkno, flags, newblkpp)
struct newblk_hashhead *newblkhd;
- struct mount *mp;
ufs2_daddr_t newblkno;
int flags;
struct newblk **newblkpp;
@@ -2221,8 +2267,6 @@
LIST_FOREACH(newblk, newblkhd, nb_hash) {
if (newblkno != newblk->nb_newblkno)
continue;
- if (mp != newblk->nb_list.wk_mp)
- continue;
/*
* If we're creating a new dependency don't match those that
* have already been converted to allocdirects. This is for
@@ -2254,18 +2298,26 @@
{
struct newblk *newblk;
struct newblk_hashhead *newblkhd;
+ struct ufsmount *ump;
- newblkhd = NEWBLK_HASH(VFSTOUFS(mp)->um_fs, newblkno);
- if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp))
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
+ newblkhd = NEWBLK_HASH(ump, newblkno);
+ if (newblk_find(newblkhd, newblkno, flags, newblkpp))
return (1);
if ((flags & DEPALLOC) == 0)
return (0);
- FREE_LOCK(&lk);
+ if (softdep_excess_items(ump, D_NEWBLK) ||
+ softdep_excess_items(ump, D_ALLOCDIRECT) ||
+ softdep_excess_items(ump, D_ALLOCINDIR))
+ schedule_cleanup(mp);
+ else
+ FREE_LOCK(ump);
newblk = malloc(sizeof(union allblk), M_NEWBLK,
M_SOFTDEP_FLAGS | M_ZERO);
workitem_alloc(&newblk->nb_list, D_NEWBLK, mp);
- ACQUIRE_LOCK(&lk);
- if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp)) {
+ ACQUIRE_LOCK(ump);
+ if (newblk_find(newblkhd, newblkno, flags, newblkpp)) {
WORKITEM_FREE(newblk, D_NEWBLK);
return (1);
}
@@ -2283,10 +2335,8 @@
/*
* Structures and routines associated with freed indirect block caching.
*/
-struct freeworklst *indir_hashtbl;
-u_long indir_hash; /* size of hash table - 1 */
-#define INDIR_HASH(mp, blkno) \
- (&indir_hashtbl[((((register_t)(mp)) >> 13) + (blkno)) & indir_hash])
+#define INDIR_HASH(ump, blkno) \
+ (&(ump)->indir_hashtbl[(blkno) & (ump)->indir_hash_size])
/*
* Lookup an indirect block in the indir hash table. The freework is
@@ -2299,14 +2349,14 @@
ufs2_daddr_t blkno;
{
struct freework *freework;
- struct freeworklst *wkhd;
+ struct indir_hashhead *wkhd;
+ struct ufsmount *ump;
- wkhd = INDIR_HASH(mp, blkno);
+ ump = VFSTOUFS(mp);
+ wkhd = INDIR_HASH(ump, blkno);
TAILQ_FOREACH(freework, wkhd, fw_next) {
if (freework->fw_blkno != blkno)
continue;
- if (freework->fw_list.wk_mp != mp)
- continue;
indirblk_remove(freework);
return (1);
}
@@ -2324,15 +2374,17 @@
{
struct jblocks *jblocks;
struct jseg *jseg;
+ struct ufsmount *ump;
- jblocks = VFSTOUFS(freework->fw_list.wk_mp)->softdep_jblocks;
+ ump = VFSTOUFS(freework->fw_list.wk_mp);
+ jblocks = ump->softdep_jblocks;
jseg = TAILQ_LAST(&jblocks->jb_segs, jseglst);
if (jseg == NULL)
return;
LIST_INSERT_HEAD(&jseg->js_indirs, freework, fw_segs);
- TAILQ_INSERT_HEAD(INDIR_HASH(freework->fw_list.wk_mp,
- freework->fw_blkno), freework, fw_next);
+ TAILQ_INSERT_HEAD(INDIR_HASH(ump, freework->fw_blkno), freework,
+ fw_next);
freework->fw_state &= ~DEPCOMPLETE;
}
@@ -2340,10 +2392,11 @@
indirblk_remove(freework)
struct freework *freework;
{
+ struct ufsmount *ump;
+ ump = VFSTOUFS(freework->fw_list.wk_mp);
LIST_REMOVE(freework, fw_segs);
- TAILQ_REMOVE(INDIR_HASH(freework->fw_list.wk_mp,
- freework->fw_blkno), freework, fw_next);
+ TAILQ_REMOVE(INDIR_HASH(ump, freework->fw_blkno), freework, fw_next);
freework->fw_state |= DEPCOMPLETE;
if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE)
WORKITEM_FREE(freework, D_FREEWORK);
@@ -2356,20 +2409,13 @@
void
softdep_initialize()
{
- int i;
- LIST_INIT(&mkdirlisthd);
+ TAILQ_INIT(&softdepmounts);
+#ifdef __LP64__
max_softdeps = desiredvnodes * 4;
- pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP, &pagedep_hash);
- inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, &inodedep_hash);
- newblk_hashtbl = hashinit(desiredvnodes / 5, M_NEWBLK, &newblk_hash);
- bmsafemap_hashtbl = hashinit(1024, M_BMSAFEMAP, &bmsafemap_hash);
- i = 1 << (ffs(desiredvnodes / 10) - 1);
- indir_hashtbl = malloc(i * sizeof(indir_hashtbl[0]), M_FREEWORK,
- M_WAITOK);
- indir_hash = i - 1;
- for (i = 0; i <= indir_hash; i++)
- TAILQ_INIT(&indir_hashtbl[i]);
+#else
+ max_softdeps = desiredvnodes * 2;
+#endif
/* initialise bioops hack */
bioops.io_start = softdep_disk_io_initiation;
@@ -2376,6 +2422,7 @@
bioops.io_complete = softdep_disk_write_complete;
bioops.io_deallocate = softdep_deallocate_dependencies;
bioops.io_countdeps = softdep_count_dependencies;
+ softdep_ast_cleanup = softdep_ast_cleanup_proc;
/* Initialize the callout with an mtx. */
callout_init_mtx(&softdep_callout, &lk, 0);
@@ -2389,12 +2436,14 @@
softdep_uninitialize()
{
+ /* clear bioops hack */
+ bioops.io_start = NULL;
+ bioops.io_complete = NULL;
+ bioops.io_deallocate = NULL;
+ bioops.io_countdeps = NULL;
+ softdep_ast_cleanup = NULL;
+
callout_drain(&softdep_callout);
- hashdestroy(pagedep_hashtbl, M_PAGEDEP, pagedep_hash);
- hashdestroy(inodedep_hashtbl, M_INODEDEP, inodedep_hash);
- hashdestroy(newblk_hashtbl, M_NEWBLK, newblk_hash);
- hashdestroy(bmsafemap_hashtbl, M_BMSAFEMAP, bmsafemap_hash);
- free(indir_hashtbl, M_FREEWORK);
}
/*
@@ -2409,11 +2458,14 @@
struct ucred *cred;
{
struct csum_total cstotal;
+ struct mount_softdeps *sdp;
struct ufsmount *ump;
struct cg *cgp;
struct buf *bp;
- int error, cyl;
+ int i, error, cyl;
+ sdp = malloc(sizeof(struct mount_softdeps), M_MOUNTDATA,
+ M_WAITOK | M_ZERO);
MNT_ILOCK(mp);
mp->mnt_flag = (mp->mnt_flag & ~MNT_ASYNC) | MNT_SOFTDEP;
if ((mp->mnt_kern_flag & MNTK_SOFTDEP) == 0) {
@@ -2420,8 +2472,11 @@
mp->mnt_kern_flag = (mp->mnt_kern_flag & ~MNTK_ASYNC) |
MNTK_SOFTDEP | MNTK_NOASYNC;
}
+ ump = VFSTOUFS(mp);
+ ump->um_softdep = sdp;
MNT_IUNLOCK(mp);
- ump = VFSTOUFS(mp);
+ rw_init(LOCK_PTR(ump), "Per-Filesystem Softdep Lock");
+ sdp->sd_ump = ump;
LIST_INIT(&ump->softdep_workitem_pending);
LIST_INIT(&ump->softdep_journal_pending);
TAILQ_INIT(&ump->softdep_unlinked);
@@ -2429,12 +2484,48 @@
ump->softdep_worklist_tail = NULL;
ump->softdep_on_worklist = 0;
ump->softdep_deps = 0;
+ LIST_INIT(&ump->softdep_mkdirlisthd);
+ ump->pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP,
+ &ump->pagedep_hash_size);
+ ump->pagedep_nextclean = 0;
+ ump->inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP,
+ &ump->inodedep_hash_size);
+ ump->inodedep_nextclean = 0;
+ ump->newblk_hashtbl = hashinit(max_softdeps / 2, M_NEWBLK,
+ &ump->newblk_hash_size);
+ ump->bmsafemap_hashtbl = hashinit(1024, M_BMSAFEMAP,
+ &ump->bmsafemap_hash_size);
+ i = 1 << (ffs(desiredvnodes / 10) - 1);
+ ump->indir_hashtbl = malloc(i * sizeof(struct indir_hashhead),
+ M_FREEWORK, M_WAITOK);
+ ump->indir_hash_size = i - 1;
+ for (i = 0; i <= ump->indir_hash_size; i++)
+ TAILQ_INIT(&ump->indir_hashtbl[i]);
+ ACQUIRE_GBLLOCK(&lk);
+ TAILQ_INSERT_TAIL(&softdepmounts, sdp, sd_next);
+ FREE_GBLLOCK(&lk);
if ((fs->fs_flags & FS_SUJ) &&
(error = journal_mount(mp, fs, cred)) != 0) {
printf("Failed to start journal: %d\n", error);
+ softdep_unmount(mp);
return (error);
}
/*
+ * Start our flushing thread in the bufdaemon process.
+ */
+ ACQUIRE_LOCK(ump);
+ ump->softdep_flags |= FLUSH_STARTING;
+ FREE_LOCK(ump);
+ kproc_kthread_add(&softdep_flush, mp, &bufdaemonproc,
+ &ump->softdep_flushtd, 0, 0, "softdepflush", "%s worker",
+ mp->mnt_stat.f_mntonname);
+ ACQUIRE_LOCK(ump);
+ while ((ump->softdep_flags & FLUSH_STARTING) != 0) {
+ msleep(&ump->softdep_flushtd, LOCK_PTR(ump), PVM, "sdstart",
+ hz / 2);
+ }
+ FREE_LOCK(ump);
+ /*
* When doing soft updates, the counters in the
* superblock may have gotten out of sync. Recomputation
* can take a long time and can be deferred for background
@@ -2449,6 +2540,7 @@
if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl)),
fs->fs_cgsize, cred, &bp)) != 0) {
brelse(bp);
+ softdep_unmount(mp);
return (error);
}
cgp = (struct cg *)bp->b_data;
@@ -2471,16 +2563,56 @@
softdep_unmount(mp)
struct mount *mp;
{
+ struct ufsmount *ump;
+#ifdef INVARIANTS
+ int i;
+#endif
+ KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+ ("softdep_unmount called on non-softdep filesystem"));
+ ump = VFSTOUFS(mp);
MNT_ILOCK(mp);
mp->mnt_flag &= ~MNT_SOFTDEP;
if (MOUNTEDSUJ(mp) == 0) {
MNT_IUNLOCK(mp);
- return;
+ } else {
+ mp->mnt_flag &= ~MNT_SUJ;
+ MNT_IUNLOCK(mp);
+ journal_unmount(ump);
}
- mp->mnt_flag &= ~MNT_SUJ;
- MNT_IUNLOCK(mp);
- journal_unmount(mp);
+ /*
+ * Shut down our flushing thread. Check for NULL is if
+ * softdep_mount errors out before the thread has been created.
+ */
+ if (ump->softdep_flushtd != NULL) {
+ ACQUIRE_LOCK(ump);
+ ump->softdep_flags |= FLUSH_EXIT;
+ wakeup(&ump->softdep_flushtd);
+ msleep(&ump->softdep_flags, LOCK_PTR(ump), PVM | PDROP,
+ "sdwait", 0);
+ KASSERT((ump->softdep_flags & FLUSH_EXIT) == 0,
+ ("Thread shutdown failed"));
+ }
+ /*
+ * Free up our resources.
+ */
+ ACQUIRE_GBLLOCK(&lk);
+ TAILQ_REMOVE(&softdepmounts, ump->um_softdep, sd_next);
+ FREE_GBLLOCK(&lk);
+ rw_destroy(LOCK_PTR(ump));
+ hashdestroy(ump->pagedep_hashtbl, M_PAGEDEP, ump->pagedep_hash_size);
+ hashdestroy(ump->inodedep_hashtbl, M_INODEDEP, ump->inodedep_hash_size);
+ hashdestroy(ump->newblk_hashtbl, M_NEWBLK, ump->newblk_hash_size);
+ hashdestroy(ump->bmsafemap_hashtbl, M_BMSAFEMAP,
+ ump->bmsafemap_hash_size);
+ free(ump->indir_hashtbl, M_FREEWORK);
+#ifdef INVARIANTS
+ for (i = 0; i <= D_LAST; i++)
+ KASSERT(ump->softdep_curdeps[i] == 0,
+ ("Unmount %s: Dep type %s != 0 (%ld)", ump->um_fs->fs_fsmnt,
+ TYPENAME(i), ump->softdep_curdeps[i]));
+#endif
+ free(ump->um_softdep, M_MOUNTDATA);
}
static struct jblocks *
@@ -2535,9 +2667,10 @@
int bytes;
{
+ LOCK_OWNED(VFSTOUFS(mp));
jblocks->jb_free += bytes / DEV_BSIZE;
if (jblocks->jb_suspended)
- worklist_speedup();
+ worklist_speedup(mp);
wakeup(jblocks);
}
@@ -2628,6 +2761,7 @@
struct ucred *cred;
{
struct jblocks *jblocks;
+ struct ufsmount *ump;
struct vnode *vp;
struct inode *ip;
ufs2_daddr_t blkno;
@@ -2635,6 +2769,12 @@
int error;
int i;
+ ump = VFSTOUFS(mp);
+ ump->softdep_journal_tail = NULL;
+ ump->softdep_on_journal = 0;
+ ump->softdep_accdeps = 0;
+ ump->softdep_req = 0;
+ ump->softdep_jblocks = NULL;
error = softdep_journal_lookup(mp, &vp);
if (error != 0) {
printf("Failed to find journal. Use tunefs to create one\n");
@@ -2659,7 +2799,7 @@
}
jblocks->jb_low = jblocks->jb_free / 3; /* Reserve 33%. */
jblocks->jb_min = jblocks->jb_free / 10; /* Suspend at 10%. */
- VFSTOUFS(mp)->softdep_jblocks = jblocks;
+ ump->softdep_jblocks = jblocks;
out:
if (error == 0) {
MNT_ILOCK(mp);
@@ -2685,12 +2825,10 @@
}
static void
-journal_unmount(mp)
- struct mount *mp;
+journal_unmount(ump)
+ struct ufsmount *ump;
{
- struct ufsmount *ump;
- ump = VFSTOUFS(mp);
if (ump->softdep_jblocks)
jblocks_destroy(ump->softdep_jblocks);
ump->softdep_jblocks = NULL;
@@ -2707,8 +2845,8 @@
{
struct ufsmount *ump;
- mtx_assert(&lk, MA_OWNED);
ump = VFSTOUFS(wk->wk_mp);
+ LOCK_OWNED(ump);
if (wk->wk_state & ONWORKLIST)
panic("add_to_journal: %s(0x%X) already on list",
TYPENAME(wk->wk_type), wk->wk_state);
@@ -2733,8 +2871,8 @@
{
struct ufsmount *ump;
- mtx_assert(&lk, MA_OWNED);
ump = VFSTOUFS(wk->wk_mp);
+ LOCK_OWNED(ump);
#ifdef SUJ_DEBUG
{
struct worklist *wkn;
@@ -2771,7 +2909,7 @@
int thresh;
{
struct jblocks *jblocks;
- int avail;
+ int limit, avail;
jblocks = ump->softdep_jblocks;
if (jblocks == NULL)
@@ -2779,8 +2917,12 @@
/*
* We use a tighter restriction here to prevent request_cleanup()
* running in threads from running into locks we currently hold.
+ * We have to be over the limit and our filesystem has to be
+ * responsible for more than our share of that usage.
*/
- if (dep_current[D_INODEDEP] > (max_softdeps / 10) * 9)
+ limit = (max_softdeps / 10) * 9;
+ if (dep_current[D_INODEDEP] > limit &&
+ ump->softdep_curdeps[D_INODEDEP] > limit / stat_flush_threads)
return (0);
if (thresh)
thresh = jblocks->jb_min;
@@ -2805,7 +2947,7 @@
if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
stat_journal_min++;
mp->mnt_kern_flag |= MNTK_SUSPEND;
- mp->mnt_susp_owner = FIRST_THREAD_IN_PROC(softdepproc);
+ mp->mnt_susp_owner = ump->softdep_flushtd;
}
jblocks->jb_suspended = 1;
MNT_IUNLOCK(mp);
@@ -2823,10 +2965,10 @@
if (jblocks != NULL && jblocks->jb_suspended &&
journal_space(ump, jblocks->jb_min)) {
jblocks->jb_suspended = 0;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
mp->mnt_susp_owner = curthread;
- vfs_write_resume(mp);
- ACQUIRE_LOCK(&lk);
+ vfs_write_resume(mp, 0);
+ ACQUIRE_LOCK(ump);
return (1);
}
return (0);
@@ -2851,21 +2993,26 @@
{
struct ufsmount *ump;
+ KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
+ ("softdep_prealloc called on non-softdep filesystem"));
/*
* Nothing to do if we are not running journaled soft updates.
- * If we currently hold the snapshot lock, we must avoid handling
- * other resources that could cause deadlock.
+ * If we currently hold the snapshot lock, we must avoid
+ * handling other resources that could cause deadlock. Do not
+ * touch quotas vnode since it is typically recursed with
+ * other vnode locks held.
*/
- if (DOINGSUJ(vp) == 0 || IS_SNAPSHOT(VTOI(vp)))
+ if (DOINGSUJ(vp) == 0 || IS_SNAPSHOT(VTOI(vp)) ||
+ (vp->v_vflag & VV_SYSTEM) != 0)
return (0);
ump = VFSTOUFS(vp->v_mount);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
if (journal_space(ump, 0)) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return (0);
}
stat_journal_low++;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (waitok == MNT_NOWAIT)
return (ENOSPC);
/*
@@ -2874,15 +3021,15 @@
*/
if ((curthread->td_pflags & TDP_COWINPROGRESS) == 0)
ffs_syncvnode(vp, waitok, 0);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
process_removes(vp);
process_truncates(vp);
if (journal_space(ump, 0) == 0) {
- softdep_speedup();
+ softdep_speedup(ump);
if (journal_space(ump, 1) == 0)
journal_suspend(ump);
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return (0);
}
@@ -2901,7 +3048,7 @@
struct ufsmount *ump;
ump = VFSTOUFS(dvp->v_mount);
- mtx_assert(&lk, MA_OWNED);
+ LOCK_OWNED(ump);
/*
* Nothing to do if we have sufficient journal space.
* If we currently hold the snapshot lock, we must avoid
@@ -2910,11 +3057,11 @@
if (journal_space(ump, 0) || (vp && IS_SNAPSHOT(VTOI(vp))))
return;
stat_journal_low++;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (vp)
ffs_syncvnode(vp, MNT_NOWAIT, 0);
ffs_syncvnode(dvp, MNT_WAIT, 0);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
/* Process vp before dvp as it may create .. removes. */
if (vp) {
process_removes(vp);
@@ -2922,10 +3069,10 @@
}
process_removes(dvp);
process_truncates(dvp);
- softdep_speedup();
+ softdep_speedup(ump);
process_worklist_item(UFSTOVFS(ump), 2, LK_NOWAIT);
if (journal_space(ump, 0) == 0) {
- softdep_speedup();
+ softdep_speedup(ump);
if (journal_space(ump, 1) == 0)
journal_suspend(ump);
}
@@ -3101,12 +3248,12 @@
return;
ump = VFSTOUFS(mp);
jblocks = ump->softdep_jblocks;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
while (ump->softdep_on_journal) {
jblocks->jb_needseg = 1;
softdep_process_journal(mp, NULL, MNT_WAIT);
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
static void softdep_synchronize_completed(struct bio *);
@@ -3118,6 +3265,7 @@
{
struct jseg *oldest;
struct jseg *jseg;
+ struct ufsmount *ump;
/*
* caller1 marks the last segment written before we issued the
@@ -3124,8 +3272,13 @@
* synchronize cache.
*/
jseg = bp->bio_caller1;
+ if (jseg == NULL) {
+ g_destroy_bio(bp);
+ return;
+ }
+ ump = VFSTOUFS(jseg->js_list.wk_mp);
+ ACQUIRE_LOCK(ump);
oldest = NULL;
- ACQUIRE_LOCK(&lk);
/*
* Mark all the journal entries waiting on the synchronize cache
* as completed so they may continue on.
@@ -3142,7 +3295,7 @@
if (oldest)
complete_jsegs(oldest);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
g_destroy_bio(bp);
}
@@ -3202,6 +3355,7 @@
bio = NULL;
jseg = NULL;
ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
fs = ump->um_fs;
jblocks = ump->softdep_jblocks;
devbsize = ump->um_devvp->v_bufobj.bo_bsize;
@@ -3232,7 +3386,7 @@
cnt++;
/*
* Verify some free journal space. softdep_prealloc() should
- * guarantee that we don't run out so this is indicative of
+ * guarantee that we don't run out so this is indicative of
* a problem with the flow control. Try to recover
* gracefully in any event.
*/
@@ -3240,10 +3394,10 @@
if (flags != MNT_WAIT)
break;
printf("softdep: Out of journal space!\n");
- softdep_speedup();
- msleep(jblocks, &lk, PRIBIO, "jblocks", hz);
+ softdep_speedup(ump);
+ msleep(jblocks, LOCK_PTR(ump), PRIBIO, "jblocks", hz);
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
jseg = malloc(sizeof(*jseg), M_JSEG, M_SOFTDEP_FLAGS);
workitem_alloc(&jseg->js_list, D_JSEG, mp);
LIST_INIT(&jseg->js_entries);
@@ -3255,7 +3409,7 @@
bio = g_alloc_bio();
jseg->js_jblocks = jblocks;
bp = geteblk(fs->fs_bsize, 0);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
/*
* If there was a race while we were allocating the block
* and jseg the entry we care about was likely written.
@@ -3267,9 +3421,9 @@
if (cnt + jblocks->jb_needseg == 0 || jblocks->jb_free == 0) {
bp->b_flags |= B_INVAL | B_NOCACHE;
WORKITEM_FREE(jseg, D_JSEG);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
brelse(bp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
break;
}
/*
@@ -3291,7 +3445,6 @@
bp->b_lblkno = bp->b_blkno;
bp->b_offset = bp->b_blkno * DEV_BSIZE;
bp->b_bcount = size;
- bp->b_bufobj = &ump->um_devvp->v_bufobj;
bp->b_flags &= ~B_INVAL;
bp->b_flags |= B_VALIDSUSPWRT | B_NOCOPY;
/*
@@ -3315,6 +3468,24 @@
*/
data = bp->b_data;
off = 0;
+
+ /*
+ * Always put a header on the first block.
+ * XXX As with below, there might not be a chance to get
+ * into the loop. Ensure that something valid is written.
+ */
+ jseg_write(ump, jseg, data);
+ off += JREC_SIZE;
+ data = bp->b_data + off;
+
+ /*
+ * XXX Something is wrong here. There's no work to do,
+ * but we need to perform and I/O and allow it to complete
+ * anyways.
+ */
+ if (LIST_EMPTY(&ump->softdep_journal_pending))
+ stat_emptyjblocks++;
+
while ((wk = LIST_FIRST(&ump->softdep_journal_pending))
!= NULL) {
if (cnt == 0)
@@ -3364,6 +3535,11 @@
data = bp->b_data + off;
cnt--;
}
+
+ /* Clear any remaining space so we don't leak kernel data */
+ if (size > off)
+ bzero(data, size - off);
+
/*
* Write this one buffer and continue.
*/
@@ -3370,10 +3546,8 @@
segwritten = 1;
jblocks->jb_needseg = 0;
WORKLIST_INSERT(&bp->b_dep, &jseg->js_list);
- FREE_LOCK(&lk);
- BO_LOCK(bp->b_bufobj);
- bgetvp(ump->um_devvp, bp);
- BO_UNLOCK(bp->b_bufobj);
+ FREE_LOCK(ump);
+ pbgetvp(ump->um_devvp, bp);
/*
* We only do the blocking wait once we find the journal
* entry we're looking for.
@@ -3382,7 +3556,7 @@
bwrite(bp);
else
bawrite(bp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
/*
* If we wrote a segment issue a synchronize cache so the journal
@@ -3403,10 +3577,10 @@
if (flags == 0 && jblocks->jb_suspended) {
if (journal_unsuspend(ump))
return;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
VFS_SYNC(mp, MNT_NOWAIT);
ffs_sbupdate(ump, MNT_WAIT, 0);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
}
@@ -3421,7 +3595,6 @@
{
struct worklist *wk;
struct jmvref *jmvref;
- int waiting;
#ifdef INVARIANTS
int i = 0;
#endif
@@ -3428,8 +3601,7 @@
while ((wk = LIST_FIRST(&jseg->js_entries)) != NULL) {
WORKLIST_REMOVE(wk);
- waiting = wk->wk_state & IOWAITING;
- wk->wk_state &= ~(INPROGRESS | IOWAITING);
+ wk->wk_state &= ~INPROGRESS;
wk->wk_state |= COMPLETE;
KASSERT(i++ < jseg->js_cnt,
("handle_written_jseg: overflow %d >= %d",
@@ -3470,8 +3642,6 @@
TYPENAME(wk->wk_type));
/* NOTREACHED */
}
- if (waiting)
- wakeup(wk);
}
/* Release the self reference so the structure may be freed. */
rele_jseg(jseg);
@@ -3528,6 +3698,7 @@
* discarded.
*/
bp->b_flags |= B_INVAL | B_NOCACHE;
+ pbrelvp(bp);
complete_jsegs(jseg);
}
@@ -3932,8 +4103,8 @@
/*
* Allocate a new freework structure that may be a level in an indirect
* when parent is not NULL or a top level block when it is. The top level
- * freework structures are allocated without lk held and before the freeblks
- * is visible outside of softdep_setup_freeblocks().
+ * freework structures are allocated without the per-filesystem lock held
+ * and before the freeblks is visible outside of softdep_setup_freeblocks().
*/
static struct freework *
newfreework(ump, freeblks, parent, lbn, nb, frags, off, journal)
@@ -3964,10 +4135,10 @@
if (journal)
newjfreeblk(freeblks, lbn, nb, frags);
if (parent == NULL) {
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
freeblks->fb_ref++;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
return (freework);
@@ -4001,7 +4172,8 @@
/*
* Allocate a new jfreeblk to journal top level block pointer when truncating
- * a file. The caller must add this to the worklist when lk is held.
+ * a file. The caller must add this to the worklist when the per-filesystem
+ * lock is held.
*/
static struct jfreeblk *
newjfreeblk(freeblks, lbn, blkno, frags)
@@ -4027,6 +4199,33 @@
}
/*
+ * The journal is only prepared to handle full-size block numbers, so we
+ * have to adjust the record to reflect the change to a full-size block.
+ * For example, suppose we have a block made up of fragments 8-15 and
+ * want to free its last two fragments. We are given a request that says:
+ * FREEBLK ino=5, blkno=14, lbn=0, frags=2, oldfrags=0
+ * where frags are the number of fragments to free and oldfrags are the
+ * number of fragments to keep. To block align it, we have to change it to
+ * have a valid full-size blkno, so it becomes:
+ * FREEBLK ino=5, blkno=8, lbn=0, frags=2, oldfrags=6
+ */
+static void
+adjust_newfreework(freeblks, frag_offset)
+ struct freeblks *freeblks;
+ int frag_offset;
+{
+ struct jfreeblk *jfreeblk;
+
+ KASSERT((LIST_FIRST(&freeblks->fb_jblkdephd) != NULL &&
+ LIST_FIRST(&freeblks->fb_jblkdephd)->jb_list.wk_type == D_JFREEBLK),
+ ("adjust_newfreework: Missing freeblks dependency"));
+
+ jfreeblk = WK_JFREEBLK(LIST_FIRST(&freeblks->fb_jblkdephd));
+ jfreeblk->jf_blkno -= frag_offset;
+ jfreeblk->jf_frags += frag_offset;
+}
+
+/*
* Allocate a new jtrunc to track a partial truncation.
*/
static struct jtrunc *
@@ -4389,6 +4588,7 @@
int waitfor;
{
+ LOCK_OWNED(VFSTOUFS(wk->wk_mp));
/*
* Blocking journal waits cause slow synchronous behavior. Record
* stats on the frequency of these blocking operations.
@@ -4442,14 +4642,10 @@
struct inode *ip;
{
struct inodedep *inodedep;
- int dflags;
KASSERT(ip->i_nlink >= ip->i_effnlink,
("inodedep_lookup_ip: bad delta"));
- dflags = DEPALLOC;
- if (IS_SNAPSHOT(ip))
- dflags |= NODELAY;
- (void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, dflags,
+ (void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC,
&inodedep);
inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
KASSERT((inodedep->id_state & UNLINKED) == 0, ("inode unlinked"));
@@ -4472,10 +4668,12 @@
struct jaddref *jaddref;
struct vnode *dvp;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+ ("softdep_setup_create called on non-softdep filesystem"));
KASSERT(ip->i_nlink == 1,
("softdep_setup_create: Invalid link count."));
dvp = ITOV(dp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(dp->i_ump);
inodedep = inodedep_lookup_ip(ip);
if (DOINGSUJ(dvp)) {
jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
@@ -4484,7 +4682,7 @@
("softdep_setup_create: No addref structure present."));
}
softdep_prelink(dvp, NULL);
- FREE_LOCK(&lk);
+ FREE_LOCK(dp->i_ump);
}
/*
@@ -4501,10 +4699,10 @@
struct inodedep *inodedep;
struct jaddref *jaddref;
struct vnode *dvp;
- struct vnode *vp;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+ ("softdep_setup_dotdot_link called on non-softdep filesystem"));
dvp = ITOV(dp);
- vp = ITOV(ip);
jaddref = NULL;
/*
* We don't set MKDIR_PARENT as this is not tied to a mkdir and
@@ -4513,13 +4711,13 @@
if (DOINGSUJ(dvp))
jaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET,
dp->i_effnlink - 1, dp->i_mode);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(dp->i_ump);
inodedep = inodedep_lookup_ip(dp);
if (jaddref)
TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
if_deps);
softdep_prelink(dvp, ITOV(ip));
- FREE_LOCK(&lk);
+ FREE_LOCK(dp->i_ump);
}
/*
@@ -4537,18 +4735,20 @@
struct jaddref *jaddref;
struct vnode *dvp;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+ ("softdep_setup_link called on non-softdep filesystem"));
dvp = ITOV(dp);
jaddref = NULL;
if (DOINGSUJ(dvp))
jaddref = newjaddref(dp, ip->i_number, 0, ip->i_effnlink - 1,
ip->i_mode);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(dp->i_ump);
inodedep = inodedep_lookup_ip(ip);
if (jaddref)
TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
if_deps);
softdep_prelink(dvp, ITOV(ip));
- FREE_LOCK(&lk);
+ FREE_LOCK(dp->i_ump);
}
/*
@@ -4568,6 +4768,8 @@
struct jaddref *jaddref;
struct vnode *dvp;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+ ("softdep_setup_mkdir called on non-softdep filesystem"));
dvp = ITOV(dp);
dotaddref = dotdotaddref = NULL;
if (DOINGSUJ(dvp)) {
@@ -4578,7 +4780,7 @@
dp->i_effnlink - 1, dp->i_mode);
dotdotaddref->ja_state |= MKDIR_PARENT;
}
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(dp->i_ump);
inodedep = inodedep_lookup_ip(ip);
if (DOINGSUJ(dvp)) {
jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
@@ -4586,8 +4788,8 @@
KASSERT(jaddref != NULL,
("softdep_setup_mkdir: No addref structure present."));
KASSERT(jaddref->ja_parent == dp->i_number,
- ("softdep_setup_mkdir: bad parent %d",
- jaddref->ja_parent));
+ ("softdep_setup_mkdir: bad parent %ju",
+ (uintmax_t)jaddref->ja_parent));
TAILQ_INSERT_BEFORE(&jaddref->ja_ref, &dotaddref->ja_ref,
if_deps);
}
@@ -4596,7 +4798,7 @@
TAILQ_INSERT_TAIL(&inodedep->id_inoreflst,
&dotdotaddref->ja_ref, if_deps);
softdep_prelink(ITOV(dp), NULL);
- FREE_LOCK(&lk);
+ FREE_LOCK(dp->i_ump);
}
/*
@@ -4610,12 +4812,14 @@
{
struct vnode *dvp;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+ ("softdep_setup_rmdir called on non-softdep filesystem"));
dvp = ITOV(dp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(dp->i_ump);
(void) inodedep_lookup_ip(ip);
(void) inodedep_lookup_ip(dp);
softdep_prelink(dvp, ITOV(ip));
- FREE_LOCK(&lk);
+ FREE_LOCK(dp->i_ump);
}
/*
@@ -4629,12 +4833,14 @@
{
struct vnode *dvp;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+ ("softdep_setup_unlink called on non-softdep filesystem"));
dvp = ITOV(dp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(dp->i_ump);
(void) inodedep_lookup_ip(ip);
(void) inodedep_lookup_ip(dp);
softdep_prelink(dvp, ITOV(ip));
- FREE_LOCK(&lk);
+ FREE_LOCK(dp->i_ump);
}
/*
@@ -4650,8 +4856,10 @@
struct jaddref *jaddref;
struct vnode *dvp;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+ ("softdep_revert_create called on non-softdep filesystem"));
dvp = ITOV(dp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(dp->i_ump);
inodedep = inodedep_lookup_ip(ip);
if (DOINGSUJ(dvp)) {
jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
@@ -4660,36 +4868,10 @@
("softdep_revert_create: addref parent mismatch"));
cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
}
- FREE_LOCK(&lk);
+ FREE_LOCK(dp->i_ump);
}
/*
- * Called to release the journal structures created by a failed dotdot link
- * creation. Adjusts nlinkdelta for non-journaling softdep.
- */
-void
-softdep_revert_dotdot_link(dp, ip)
- struct inode *dp;
- struct inode *ip;
-{
- struct inodedep *inodedep;
- struct jaddref *jaddref;
- struct vnode *dvp;
-
- dvp = ITOV(dp);
- ACQUIRE_LOCK(&lk);
- inodedep = inodedep_lookup_ip(dp);
- if (DOINGSUJ(dvp)) {
- jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
- inoreflst);
- KASSERT(jaddref->ja_parent == ip->i_number,
- ("softdep_revert_dotdot_link: addref parent mismatch"));
- cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
- }
- FREE_LOCK(&lk);
-}
-
-/*
* Called to release the journal structures created by a failed link
* addition. Adjusts nlinkdelta for non-journaling softdep.
*/
@@ -4702,8 +4884,10 @@
struct jaddref *jaddref;
struct vnode *dvp;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+ ("softdep_revert_link called on non-softdep filesystem"));
dvp = ITOV(dp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(dp->i_ump);
inodedep = inodedep_lookup_ip(ip);
if (DOINGSUJ(dvp)) {
jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
@@ -4712,7 +4896,7 @@
("softdep_revert_link: addref parent mismatch"));
cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
}
- FREE_LOCK(&lk);
+ FREE_LOCK(dp->i_ump);
}
/*
@@ -4729,9 +4913,11 @@
struct jaddref *dotaddref;
struct vnode *dvp;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+ ("softdep_revert_mkdir called on non-softdep filesystem"));
dvp = ITOV(dp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(dp->i_ump);
inodedep = inodedep_lookup_ip(dp);
if (DOINGSUJ(dvp)) {
jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
@@ -4753,7 +4939,7 @@
("softdep_revert_mkdir: dot addref parent mismatch"));
cancel_jaddref(dotaddref, inodedep, &inodedep->id_inowait);
}
- FREE_LOCK(&lk);
+ FREE_LOCK(dp->i_ump);
}
/*
@@ -4765,10 +4951,12 @@
struct inode *ip;
{
- ACQUIRE_LOCK(&lk);
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(dp->i_ump)) != 0,
+ ("softdep_revert_rmdir called on non-softdep filesystem"));
+ ACQUIRE_LOCK(dp->i_ump);
(void) inodedep_lookup_ip(ip);
(void) inodedep_lookup_ip(dp);
- FREE_LOCK(&lk);
+ FREE_LOCK(dp->i_ump);
}
/*
@@ -4820,6 +5008,8 @@
struct fs *fs;
mp = UFSTOVFS(ip->i_ump);
+ KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+ ("softdep_setup_inomapdep called on non-softdep filesystem"));
fs = ip->i_ump->um_fs;
jaddref = NULL;
@@ -4852,8 +5042,8 @@
bmsafemap = malloc(sizeof(struct bmsafemap),
M_BMSAFEMAP, M_SOFTDEP_FLAGS);
workitem_alloc(&bmsafemap->sm_list, D_BMSAFEMAP, mp);
- ACQUIRE_LOCK(&lk);
- if ((inodedep_lookup(mp, newinum, DEPALLOC | NODELAY, &inodedep)))
+ ACQUIRE_LOCK(ip->i_ump);
+ if ((inodedep_lookup(mp, newinum, DEPALLOC, &inodedep)))
panic("softdep_setup_inomapdep: dependency %p for new"
"inode already exists", inodedep);
bmsafemap = bmsafemap_lookup(mp, bp, ino_to_cg(fs, newinum), bmsafemap);
@@ -4867,7 +5057,7 @@
}
inodedep->id_bmsafemap = bmsafemap;
inodedep->id_state &= ~DEPCOMPLETE;
- FREE_LOCK(&lk);
+ FREE_LOCK(ip->i_ump);
}
/*
@@ -4885,9 +5075,13 @@
struct newblk *newblk;
struct bmsafemap *bmsafemap;
struct jnewblk *jnewblk;
+ struct ufsmount *ump;
struct fs *fs;
- fs = VFSTOUFS(mp)->um_fs;
+ KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+ ("softdep_setup_blkmapdep called on non-softdep filesystem"));
+ ump = VFSTOUFS(mp);
+ fs = ump->um_fs;
jnewblk = NULL;
/*
* Create a dependency for the newly allocated block.
@@ -4930,7 +5124,7 @@
CTR3(KTR_SUJ,
"softdep_setup_blkmapdep: blkno %jd frags %d oldfrags %d",
newblkno, frags, oldfrags);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
if (newblk_lookup(mp, newblkno, DEPALLOC, &newblk) != 0)
panic("softdep_setup_blkmapdep: found block");
newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(mp, bp,
@@ -4944,16 +5138,15 @@
}
newblk->nb_bmsafemap = bmsafemap;
newblk->nb_jnewblk = jnewblk;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
-#define BMSAFEMAP_HASH(fs, cg) \
- (&bmsafemap_hashtbl[((((register_t)(fs)) >> 13) + (cg)) & bmsafemap_hash])
+#define BMSAFEMAP_HASH(ump, cg) \
+ (&(ump)->bmsafemap_hashtbl[(cg) & (ump)->bmsafemap_hash_size])
static int
-bmsafemap_find(bmsafemaphd, mp, cg, bmsafemapp)
+bmsafemap_find(bmsafemaphd, cg, bmsafemapp)
struct bmsafemap_hashhead *bmsafemaphd;
- struct mount *mp;
int cg;
struct bmsafemap **bmsafemapp;
{
@@ -4960,7 +5153,7 @@
struct bmsafemap *bmsafemap;
LIST_FOREACH(bmsafemap, bmsafemaphd, sm_hash)
- if (bmsafemap->sm_list.wk_mp == mp && bmsafemap->sm_cg == cg)
+ if (bmsafemap->sm_cg == cg)
break;
if (bmsafemap) {
*bmsafemapp = bmsafemap;
@@ -4989,19 +5182,20 @@
struct bmsafemap_hashhead *bmsafemaphd;
struct bmsafemap *bmsafemap, *collision;
struct worklist *wk;
- struct fs *fs;
+ struct ufsmount *ump;
- mtx_assert(&lk, MA_OWNED);
- if (bp)
- LIST_FOREACH(wk, &bp->b_dep, wk_list)
- if (wk->wk_type == D_BMSAFEMAP) {
- if (newbmsafemap)
- WORKITEM_FREE(newbmsafemap,D_BMSAFEMAP);
- return (WK_BMSAFEMAP(wk));
- }
- fs = VFSTOUFS(mp)->um_fs;
- bmsafemaphd = BMSAFEMAP_HASH(fs, cg);
- if (bmsafemap_find(bmsafemaphd, mp, cg, &bmsafemap) == 1) {
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
+ KASSERT(bp != NULL, ("bmsafemap_lookup: missing buffer"));
+ LIST_FOREACH(wk, &bp->b_dep, wk_list) {
+ if (wk->wk_type == D_BMSAFEMAP) {
+ if (newbmsafemap)
+ WORKITEM_FREE(newbmsafemap, D_BMSAFEMAP);
+ return (WK_BMSAFEMAP(wk));
+ }
+ }
+ bmsafemaphd = BMSAFEMAP_HASH(ump, cg);
+ if (bmsafemap_find(bmsafemaphd, cg, &bmsafemap) == 1) {
if (newbmsafemap)
WORKITEM_FREE(newbmsafemap, D_BMSAFEMAP);
return (bmsafemap);
@@ -5009,11 +5203,11 @@
if (newbmsafemap) {
bmsafemap = newbmsafemap;
} else {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
bmsafemap = malloc(sizeof(struct bmsafemap),
M_BMSAFEMAP, M_SOFTDEP_FLAGS);
workitem_alloc(&bmsafemap->sm_list, D_BMSAFEMAP, mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
bmsafemap->sm_buf = bp;
LIST_INIT(&bmsafemap->sm_inodedephd);
@@ -5024,13 +5218,13 @@
LIST_INIT(&bmsafemap->sm_jnewblkhd);
LIST_INIT(&bmsafemap->sm_freehd);
LIST_INIT(&bmsafemap->sm_freewr);
- if (bmsafemap_find(bmsafemaphd, mp, cg, &collision) == 1) {
+ if (bmsafemap_find(bmsafemaphd, cg, &collision) == 1) {
WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
return (collision);
}
bmsafemap->sm_cg = cg;
LIST_INSERT_HEAD(bmsafemaphd, bmsafemap, sm_hash);
- LIST_INSERT_HEAD(&VFSTOUFS(mp)->softdep_dirtycg, bmsafemap, sm_next);
+ LIST_INSERT_HEAD(&ump->softdep_dirtycg, bmsafemap, sm_next);
WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list);
return (bmsafemap);
}
@@ -5086,6 +5280,8 @@
lbn = bp->b_lblkno;
mp = UFSTOVFS(ip->i_ump);
+ KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+ ("softdep_setup_allocdirect called on non-softdep filesystem"));
if (oldblkno && oldblkno != newblkno)
freefrag = newfreefrag(ip, oldblkno, oldsize, lbn);
else
@@ -5095,7 +5291,7 @@
"softdep_setup_allocdirect: ino %d blkno %jd oldblkno %jd "
"off %jd newsize %ld oldsize %d",
ip->i_number, newblkno, oldblkno, off, newsize, oldsize);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ip->i_ump);
if (off >= NDADDR) {
if (lbn > 0)
panic("softdep_setup_allocdirect: bad lbn %jd, off %jd",
@@ -5125,7 +5321,7 @@
/*
* Convert the newblk to an allocdirect.
*/
- newblk->nb_list.wk_type = D_ALLOCDIRECT;
+ WORKITEM_REASSIGN(newblk, D_ALLOCDIRECT);
adp = (struct allocdirect *)newblk;
newblk->nb_freefrag = freefrag;
adp->ad_offset = off;
@@ -5144,7 +5340,7 @@
if (freefrag && freefrag->ff_jdep != NULL &&
freefrag->ff_jdep->wk_type == D_JFREEFRAG)
add_to_journal(freefrag->ff_jdep);
- inodedep_lookup(mp, ip->i_number, DEPALLOC | NODELAY, &inodedep);
+ inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
adp->ad_inodedep = inodedep;
WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list);
@@ -5167,7 +5363,7 @@
TAILQ_INSERT_TAIL(adphead, adp, ad_next);
if (oldadp != NULL && oldadp->ad_offset == off)
allocdirect_merge(adphead, adp, oldadp);
- FREE_LOCK(&lk);
+ FREE_LOCK(ip->i_ump);
return;
}
TAILQ_FOREACH(oldadp, adphead, ad_next) {
@@ -5181,7 +5377,7 @@
if (oldadp->ad_offset == off)
allocdirect_merge(adphead, adp, oldadp);
- FREE_LOCK(&lk);
+ FREE_LOCK(ip->i_ump);
}
/*
@@ -5261,7 +5457,7 @@
struct freefrag *freefrag;
freefrag = NULL;
- mtx_assert(&lk, MA_OWNED);
+ LOCK_OWNED(VFSTOUFS(newadp->ad_list.wk_mp));
if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
newadp->ad_oldsize != oldadp->ad_newsize ||
newadp->ad_offset >= NDADDR)
@@ -5419,7 +5615,7 @@
* safe to modify the list head here.
*/
LIST_INIT(&wkhd);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
LIST_SWAP(&freefrag->ff_jwork, &wkhd, worklist, wk_list);
/*
* If the journal has not been written we must cancel it here.
@@ -5430,12 +5626,12 @@
freefrag->ff_jdep->wk_type);
cancel_jnewblk(WK_JNEWBLK(freefrag->ff_jdep), &wkhd);
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
ffs_blkfree(ump, ump->um_fs, ump->um_devvp, freefrag->ff_blkno,
freefrag->ff_fragsize, freefrag->ff_inum, freefrag->ff_vtype, &wkhd);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
WORKITEM_FREE(freefrag, D_FREEFRAG);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
/*
@@ -5462,18 +5658,19 @@
struct mount *mp;
ufs_lbn_t lbn;
- if (off >= NXADDR)
- panic("softdep_setup_allocext: lbn %lld > NXADDR",
- (long long)off);
+ mp = UFSTOVFS(ip->i_ump);
+ KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+ ("softdep_setup_allocext called on non-softdep filesystem"));
+ KASSERT(off < NXADDR, ("softdep_setup_allocext: lbn %lld > NXADDR",
+ (long long)off));
lbn = bp->b_lblkno;
- mp = UFSTOVFS(ip->i_ump);
if (oldblkno && oldblkno != newblkno)
freefrag = newfreefrag(ip, oldblkno, oldsize, lbn);
else
freefrag = NULL;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ip->i_ump);
if (newblk_lookup(mp, newblkno, 0, &newblk) == 0)
panic("softdep_setup_allocext: lost block");
KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
@@ -5481,7 +5678,7 @@
/*
* Convert the newblk to an allocdirect.
*/
- newblk->nb_list.wk_type = D_ALLOCDIRECT;
+ WORKITEM_REASSIGN(newblk, D_ALLOCDIRECT);
adp = (struct allocdirect *)newblk;
newblk->nb_freefrag = freefrag;
adp->ad_offset = off;
@@ -5501,7 +5698,7 @@
if (freefrag && freefrag->ff_jdep != NULL &&
freefrag->ff_jdep->wk_type == D_JFREEFRAG)
add_to_journal(freefrag->ff_jdep);
- inodedep_lookup(mp, ip->i_number, DEPALLOC | NODELAY, &inodedep);
+ inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
adp->ad_inodedep = inodedep;
WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list);
@@ -5524,7 +5721,7 @@
TAILQ_INSERT_TAIL(adphead, adp, ad_next);
if (oldadp != NULL && oldadp->ad_offset == off)
allocdirect_merge(adphead, adp, oldadp);
- FREE_LOCK(&lk);
+ FREE_LOCK(ip->i_ump);
return;
}
TAILQ_FOREACH(oldadp, adphead, ad_next) {
@@ -5537,7 +5734,7 @@
TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
if (oldadp->ad_offset == off)
allocdirect_merge(adphead, adp, oldadp);
- FREE_LOCK(&lk);
+ FREE_LOCK(ip->i_ump);
}
/*
@@ -5585,12 +5782,12 @@
freefrag = newfreefrag(ip, oldblkno, ip->i_fs->fs_bsize, lbn);
else
freefrag = NULL;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ip->i_ump);
if (newblk_lookup(UFSTOVFS(ip->i_ump), newblkno, 0, &newblk) == 0)
panic("new_allocindir: lost block");
KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
("newallocindir: newblk already initialized"));
- newblk->nb_list.wk_type = D_ALLOCINDIR;
+ WORKITEM_REASSIGN(newblk, D_ALLOCINDIR);
newblk->nb_freefrag = freefrag;
aip = (struct allocindir *)newblk;
aip->ai_offset = ptrno;
@@ -5626,21 +5823,19 @@
struct allocindir *aip;
struct pagedep *pagedep;
struct mount *mp;
- int dflags;
- if (lbn != nbp->b_lblkno)
- panic("softdep_setup_allocindir_page: lbn %jd != lblkno %jd",
- lbn, bp->b_lblkno);
+ mp = UFSTOVFS(ip->i_ump);
+ KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+ ("softdep_setup_allocindir_page called on non-softdep filesystem"));
+ KASSERT(lbn == nbp->b_lblkno,
+ ("softdep_setup_allocindir_page: lbn %jd != lblkno %jd",
+ lbn, bp->b_lblkno));
CTR4(KTR_SUJ,
"softdep_setup_allocindir_page: ino %d blkno %jd oldblkno %jd "
"lbn %jd", ip->i_number, newblkno, oldblkno, lbn);
ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_page");
- mp = UFSTOVFS(ip->i_ump);
aip = newallocindir(ip, ptrno, newblkno, oldblkno, lbn);
- dflags = DEPALLOC;
- if (IS_SNAPSHOT(ip))
- dflags |= NODELAY;
- (void) inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
+ (void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
/*
* If we are allocating a directory page, then we must
* allocate an associated pagedep to track additions and
@@ -5650,7 +5845,7 @@
pagedep_lookup(mp, nbp, ip->i_number, lbn, DEPALLOC, &pagedep);
WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list);
freefrag = setup_allocindir_phase2(bp, ip, inodedep, aip, lbn);
- FREE_LOCK(&lk);
+ FREE_LOCK(ip->i_ump);
if (freefrag)
handle_workitem_freefrag(freefrag);
}
@@ -5670,8 +5865,9 @@
struct inodedep *inodedep;
struct allocindir *aip;
ufs_lbn_t lbn;
- int dflags;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ip->i_ump)) != 0,
+ ("softdep_setup_allocindir_meta called on non-softdep filesystem"));
CTR3(KTR_SUJ,
"softdep_setup_allocindir_meta: ino %d blkno %jd ptrno %d",
ip->i_number, newblkno, ptrno);
@@ -5678,14 +5874,12 @@
lbn = nbp->b_lblkno;
ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_meta");
aip = newallocindir(ip, ptrno, newblkno, 0, lbn);
- dflags = DEPALLOC;
- if (IS_SNAPSHOT(ip))
- dflags |= NODELAY;
- inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, dflags, &inodedep);
+ inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC,
+ &inodedep);
WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list);
if (setup_allocindir_phase2(bp, ip, inodedep, aip, lbn))
panic("softdep_setup_allocindir_meta: Block already existed");
- FREE_LOCK(&lk);
+ FREE_LOCK(ip->i_ump);
}
static void
@@ -5718,11 +5912,13 @@
{
struct indirdep *indirdep, *newindirdep;
struct newblk *newblk;
+ struct ufsmount *ump;
struct worklist *wk;
struct fs *fs;
ufs2_daddr_t blkno;
- mtx_assert(&lk, MA_OWNED);
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
indirdep = NULL;
newindirdep = NULL;
fs = ip->i_fs;
@@ -5742,7 +5938,7 @@
if (indirdep == NULL && newindirdep != NULL)
break;
/* None found and no new structure available. */
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
newindirdep = malloc(sizeof(struct indirdep),
M_INDIRDEP, M_SOFTDEP_FLAGS);
workitem_alloc(&newindirdep->ir_list, D_INDIRDEP, mp);
@@ -5766,7 +5962,7 @@
newindirdep->ir_bp = bp;
BUF_KERNPROC(newindirdep->ir_savebp);
bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
indirdep = newindirdep;
WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);
@@ -5801,7 +5997,7 @@
struct freefrag *freefrag;
struct mount *mp;
- mtx_assert(&lk, MA_OWNED);
+ LOCK_OWNED(ip->i_ump);
mp = UFSTOVFS(ip->i_ump);
fs = ip->i_fs;
if (bp->b_lblkno >= 0)
@@ -6056,7 +6252,7 @@
* allocations from proceeding until we are finished with the
* truncate and the block is written.
*/
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ip->i_ump);
indirdep = indirdep_lookup(mp, ip, bp);
if (indirdep->ir_freeblks)
panic("setup_trunc_indir: indirdep already truncated.");
@@ -6073,7 +6269,7 @@
trunc_indirdep(indirn, freeblks, bp, off);
} else
trunc_indirdep(indirdep, freeblks, bp, off);
- FREE_LOCK(&lk);
+ FREE_LOCK(ip->i_ump);
/*
* Creation is protected by the buf lock. The saveddata is only
* needed if a full truncation follows a partial truncation but it
@@ -6116,10 +6312,13 @@
{
struct freework *fwn;
struct indirdep *indirdep;
+ struct ufsmount *ump;
struct buf *bp;
uintptr_t start;
int count;
+ ump = VFSTOUFS(freework->fw_list.wk_mp);
+ LOCK_OWNED(ump);
indirdep = freework->fw_indir;
for (;;) {
bp = indirdep->ir_bp;
@@ -6129,12 +6328,11 @@
/* Inline part of getdirtybuf(). We dont want bremfree. */
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0)
break;
- if (BUF_LOCK(bp,
- LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, &lk) == 0)
+ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
+ LOCK_PTR(ump)) == 0)
BUF_UNLOCK(bp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
- mtx_assert(&lk, MA_OWNED);
freework->fw_state |= DEPCOMPLETE;
TAILQ_REMOVE(&indirdep->ir_trunc, freework, fw_next);
/*
@@ -6195,7 +6393,7 @@
* Calculate the number of blocks we are going to release where datablocks
* is the current total and length is the new file size.
*/
-ufs2_daddr_t
+static ufs2_daddr_t
blkcount(fs, datablocks, length)
struct fs *fs;
ufs2_daddr_t datablocks;
@@ -6283,6 +6481,7 @@
struct inodedep *inodedep;
struct jblkdep *jblkdep;
struct allocdirect *adp, *adpn;
+ struct ufsmount *ump;
struct fs *fs;
struct buf *bp;
struct vnode *vp;
@@ -6289,10 +6488,13 @@
struct mount *mp;
ufs2_daddr_t extblocks, datablocks;
ufs_lbn_t tmpval, lbn, lastlbn;
- int frags, lastoff, iboff, allocblock, needj, dflags, error, i;
+ int frags, lastoff, iboff, allocblock, needj, error, i;
fs = ip->i_fs;
- mp = UFSTOVFS(ip->i_ump);
+ ump = ip->i_ump;
+ mp = UFSTOVFS(ump);
+ KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+ ("softdep_journal_freeblocks called on non-softdep filesystem"));
vp = ITOV(ip);
needj = 1;
iboff = -1;
@@ -6301,22 +6503,19 @@
datablocks = 0;
frags = 0;
freeblks = newfreeblks(mp, ip);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
/*
* If we're truncating a removed file that will never be written
* we don't need to journal the block frees. The canceled journals
* for the allocations will suffice.
*/
- dflags = DEPALLOC;
- if (IS_SNAPSHOT(ip))
- dflags |= NODELAY;
- inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
+ inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED &&
length == 0)
needj = 0;
CTR3(KTR_SUJ, "softdep_journal_freeblks: ip %d length %ld needj %d",
ip->i_number, length, needj);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
/*
* Calculate the lbn that we are truncating to. This results in -1
* if we're truncating the 0 bytes. So it is the last lbn we want
@@ -6375,8 +6574,11 @@
oldfrags -= frags;
oldfrags = numfrags(ip->i_fs, oldfrags);
blkno += numfrags(ip->i_fs, frags);
- newfreework(ip->i_ump, freeblks, NULL, lastlbn,
+ newfreework(ump, freeblks, NULL, lastlbn,
blkno, oldfrags, 0, needj);
+ if (needj)
+ adjust_newfreework(freeblks,
+ numfrags(ip->i_fs, frags));
} else if (blkno == 0)
allocblock = 1;
}
@@ -6408,9 +6610,9 @@
(void) chkdq(ip, -datablocks, NOCRED, 0);
#endif
freeblks->fb_chkcnt = -datablocks;
- UFS_LOCK(ip->i_ump);
+ UFS_LOCK(ump);
fs->fs_pendingblocks += datablocks;
- UFS_UNLOCK(ip->i_ump);
+ UFS_UNLOCK(ump);
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - datablocks);
/*
* Handle truncation of incomplete alloc direct dependencies. We
@@ -6430,14 +6632,14 @@
if (bp->b_bufsize == fs->fs_bsize)
bp->b_flags |= B_CLUSTEROK;
softdep_update_inodeblock(ip, bp, 0);
- if (ip->i_ump->um_fstype == UFS1)
+ if (ump->um_fstype == UFS1)
*((struct ufs1_dinode *)bp->b_data +
ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
else
*((struct ufs2_dinode *)bp->b_data +
ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
- ACQUIRE_LOCK(&lk);
- (void) inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
+ ACQUIRE_LOCK(ump);
+ (void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
if ((inodedep->id_state & IOSTARTED) != 0)
panic("softdep_setup_freeblocks: inode busy");
/*
@@ -6465,7 +6667,7 @@
}
}
if ((flags & IO_EXT) != 0)
- while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0)
+ while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
cancel_allocdirect(&inodedep->id_extupdt, adp,
freeblks);
/*
@@ -6488,7 +6690,7 @@
*/
LIST_FOREACH(jblkdep, &freeblks->fb_jblkdephd, jb_deps)
add_to_journal(&jblkdep->jb_list);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
bdwrite(bp);
/*
* Truncate dependency structures beyond length.
@@ -6529,8 +6731,8 @@
bawrite(bp);
}
- ACQUIRE_LOCK(&lk);
- inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
+ ACQUIRE_LOCK(ump);
+ inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
TAILQ_INSERT_TAIL(&inodedep->id_freeblklst, freeblks, fb_next);
freeblks->fb_state |= DEPCOMPLETE | ONDEPLIST;
/*
@@ -6545,7 +6747,7 @@
freeblks->fb_state |= INPROGRESS;
else
freeblks = NULL;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (freeblks)
handle_workitem_freeblocks(freeblks, 0);
trunc_pages(ip, length, extblocks, flags);
@@ -6561,6 +6763,8 @@
{
struct jfsync *jfsync;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ip->i_ump)) != 0,
+ ("softdep_journal_fsync called on non-softdep filesystem"));
if ((ip->i_flag & IN_TRUNCATED) == 0)
return;
ip->i_flag &= ~IN_TRUNCATED;
@@ -6568,10 +6772,10 @@
workitem_alloc(&jfsync->jfs_list, D_JFSYNC, UFSTOVFS(ip->i_ump));
jfsync->jfs_size = ip->i_size;
jfsync->jfs_ino = ip->i_number;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ip->i_ump);
add_to_journal(&jfsync->jfs_list);
jwait(&jfsync->jfs_list, MNT_WAIT);
- FREE_LOCK(&lk);
+ FREE_LOCK(ip->i_ump);
}
/*
@@ -6614,20 +6818,23 @@
struct freeblks *freeblks;
struct inodedep *inodedep;
struct allocdirect *adp;
+ struct ufsmount *ump;
struct buf *bp;
struct fs *fs;
ufs2_daddr_t extblocks, datablocks;
struct mount *mp;
- int i, delay, error, dflags;
+ int i, delay, error;
ufs_lbn_t tmpval;
ufs_lbn_t lbn;
+ ump = ip->i_ump;
+ mp = UFSTOVFS(ump);
+ KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+ ("softdep_setup_freeblocks called on non-softdep filesystem"));
CTR2(KTR_SUJ, "softdep_setup_freeblks: ip %d length %ld",
ip->i_number, length);
+ KASSERT(length == 0, ("softdep_setup_freeblocks: non-zero length"));
fs = ip->i_fs;
- mp = UFSTOVFS(ip->i_ump);
- if (length != 0)
- panic("softdep_setup_freeblocks: non-zero length");
freeblks = newfreeblks(mp, ip);
extblocks = 0;
datablocks = 0;
@@ -6655,9 +6862,9 @@
(void) chkdq(ip, -datablocks, NOCRED, 0);
#endif
freeblks->fb_chkcnt = -datablocks;
- UFS_LOCK(ip->i_ump);
+ UFS_LOCK(ump);
fs->fs_pendingblocks += datablocks;
- UFS_UNLOCK(ip->i_ump);
+ UFS_UNLOCK(ump);
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - datablocks);
/*
* Push the zero'ed inode to to its disk buffer so that we are free
@@ -6670,7 +6877,7 @@
brelse(bp);
softdep_error("softdep_setup_freeblocks", error);
}
- if (ip->i_ump->um_fstype == UFS1) {
+ if (ump->um_fstype == UFS1) {
dp1 = ((struct ufs1_dinode *)bp->b_data +
ino_to_fsbo(fs, ip->i_number));
ip->i_din1->di_freelink = dp1->di_freelink;
@@ -6684,11 +6891,8 @@
/*
* Find and eliminate any inode dependencies.
*/
- ACQUIRE_LOCK(&lk);
- dflags = DEPALLOC;
- if (IS_SNAPSHOT(ip))
- dflags |= NODELAY;
- (void) inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
+ ACQUIRE_LOCK(ump);
+ (void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
if ((inodedep->id_state & IOSTARTED) != 0)
panic("softdep_setup_freeblocks: inode busy");
/*
@@ -6715,7 +6919,7 @@
if (flags & IO_NORMAL) {
merge_inode_lists(&inodedep->id_newinoupdt,
&inodedep->id_inoupdt);
- while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
+ while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
cancel_allocdirect(&inodedep->id_inoupdt, adp,
freeblks);
}
@@ -6722,14 +6926,14 @@
if (flags & IO_EXT) {
merge_inode_lists(&inodedep->id_newextupdt,
&inodedep->id_extupdt);
- while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0)
+ while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
cancel_allocdirect(&inodedep->id_extupdt, adp,
freeblks);
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
bdwrite(bp);
trunc_dependencies(ip, freeblks, -1, 0, flags);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0)
(void) free_inodedep(inodedep);
freeblks->fb_state |= DEPCOMPLETE;
@@ -6741,7 +6945,7 @@
freeblks->fb_state |= INPROGRESS;
else
freeblks = NULL;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (freeblks)
handle_workitem_freeblocks(freeblks, 0);
trunc_pages(ip, length, extblocks, flags);
@@ -6854,7 +7058,6 @@
struct bufobj *bo;
struct vnode *vp;
struct buf *bp;
- struct fs *fs;
int blkoff;
/*
@@ -6863,7 +7066,6 @@
* Once they are all there, walk the list and get rid of
* any dependencies.
*/
- fs = ip->i_fs;
vp = ITOV(ip);
bo = &vp->v_bufobj;
BO_LOCK(bo);
@@ -6878,7 +7080,8 @@
bp->b_vflags |= BV_SCANNED;
continue;
}
- if ((bp = getdirtybuf(bp, BO_MTX(bo), MNT_WAIT)) == NULL)
+ KASSERT(bp->b_bufobj == bo, ("Wrong object in buffer"));
+ if ((bp = getdirtybuf(bp, BO_LOCKPTR(bo), MNT_WAIT)) == NULL)
goto restart;
BO_UNLOCK(bo);
if (deallocate_dependencies(bp, freeblks, blkoff))
@@ -6903,14 +7106,12 @@
}
if (BUF_LOCK(bp,
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
- BO_MTX(bo)) == ENOLCK) {
+ BO_LOCKPTR(bo)) == ENOLCK) {
BO_LOCK(bo);
goto cleanrestart;
}
bp->b_vflags |= BV_SCANNED;
- BO_LOCK(bo);
bremfree(bp);
- BO_UNLOCK(bo);
if (blkoff != 0) {
allocbuf(bp, blkoff);
bqrelse(bp);
@@ -7015,10 +7216,13 @@
{
struct indirdep *indirdep;
struct pagedep *pagedep;
- struct allocdirect *adp;
struct worklist *wk, *wkn;
+ struct ufsmount *ump;
- ACQUIRE_LOCK(&lk);
+ if ((wk = LIST_FIRST(&bp->b_dep)) == NULL)
+ goto done;
+ ump = VFSTOUFS(wk->wk_mp);
+ ACQUIRE_LOCK(ump);
LIST_FOREACH_SAFE(wk, &bp->b_dep, wk_list, wkn) {
switch (wk->wk_type) {
case D_INDIRDEP:
@@ -7032,7 +7236,7 @@
case D_PAGEDEP:
pagedep = WK_PAGEDEP(wk);
if (cancel_pagedep(pagedep, freeblks, off)) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return (ERESTART);
}
continue;
@@ -7058,7 +7262,6 @@
break;
case D_ALLOCDIRECT:
- adp = WK_ALLOCDIRECT(wk);
if (off != 0)
continue;
/* FALLTHROUGH */
@@ -7068,7 +7271,8 @@
/* NOTREACHED */
}
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
+done:
/*
* Don't throw away this buf, we were partially truncating and
* some deps may always remain.
@@ -7223,8 +7427,10 @@
struct worklist *wk;
KASSERT(newblk->nb_jnewblk == NULL,
- ("free_newblk; jnewblk %p still attached", newblk->nb_jnewblk));
- mtx_assert(&lk, MA_OWNED);
+ ("free_newblk: jnewblk %p still attached", newblk->nb_jnewblk));
+ KASSERT(newblk->nb_list.wk_type != D_NEWBLK,
+ ("free_newblk: unclaimed newblk"));
+ LOCK_OWNED(VFSTOUFS(newblk->nb_list.wk_mp));
newblk_freefrag(newblk);
if (newblk->nb_state & ONDEPLIST)
LIST_REMOVE(newblk, nb_deps);
@@ -7238,7 +7444,6 @@
while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL)
indirdep_complete(indirdep);
handle_jwork(&newblk->nb_jwork);
- newblk->nb_list.wk_type = D_NEWBLK;
WORKITEM_FREE(newblk, D_NEWBLK);
}
@@ -7254,7 +7459,7 @@
struct diradd *dap;
struct worklist *wk;
- mtx_assert(&lk, MA_OWNED);
+ LOCK_OWNED(VFSTOUFS(newdirblk->db_list.wk_mp));
WORKLIST_REMOVE(&newdirblk->db_list);
/*
* If the pagedep is still linked onto the directory buffer
@@ -7298,7 +7503,11 @@
struct inodedep *inodedep;
struct freefile *freefile;
struct freeblks *freeblks;
+ struct ufsmount *ump;
+ ump = ip->i_ump;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
+ ("softdep_freefile called on non-softdep filesystem"));
/*
* This sets up the inode de-allocation dependency.
*/
@@ -7309,9 +7518,9 @@
freefile->fx_oldinum = ino;
freefile->fx_devvp = ip->i_devvp;
LIST_INIT(&freefile->fx_jwork);
- UFS_LOCK(ip->i_ump);
+ UFS_LOCK(ump);
ip->i_fs->fs_pendinginodes += 1;
- UFS_UNLOCK(ip->i_ump);
+ UFS_UNLOCK(ump);
/*
* If the inodedep does not exist, then the zero'ed inode has
@@ -7324,7 +7533,7 @@
* Any blocks waiting on the inode to write can be safely freed
* here as it will never been written.
*/
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
if (inodedep) {
/*
@@ -7348,12 +7557,15 @@
*/
handle_bufwait(inodedep, &freefile->fx_jwork);
clear_unlinked_inodedep(inodedep);
- /* Re-acquire inodedep as we've dropped lk. */
+ /*
+ * Re-acquire inodedep as we've dropped the
+ * per-filesystem lock in clear_unlinked_inodedep().
+ */
inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
}
}
if (inodedep == NULL || check_inode_unwritten(inodedep)) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
handle_workitem_freefile(freefile);
return;
}
@@ -7360,7 +7572,7 @@
if ((inodedep->id_state & DEPCOMPLETE) == 0)
inodedep->id_state |= GOINGAWAY;
WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (ip->i_number == ino)
ip->i_flag |= IN_MODIFIED;
}
@@ -7385,7 +7597,7 @@
struct inodedep *inodedep;
{
- mtx_assert(&lk, MA_OWNED);
+ LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
if ((inodedep->id_state & (DEPCOMPLETE | UNLINKED)) != 0 ||
!LIST_EMPTY(&inodedep->id_dirremhd) ||
@@ -7425,17 +7637,13 @@
return (1);
}
-/*
- * Try to free an inodedep structure. Return 1 if it could be freed.
- */
static int
-free_inodedep(inodedep)
+check_inodedep_free(inodedep)
struct inodedep *inodedep;
{
- mtx_assert(&lk, MA_OWNED);
- if ((inodedep->id_state & (ONWORKLIST | UNLINKED)) != 0 ||
- (inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
+ LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
+ if ((inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
!LIST_EMPTY(&inodedep->id_dirremhd) ||
!LIST_EMPTY(&inodedep->id_pendinghd) ||
!LIST_EMPTY(&inodedep->id_bufwait) ||
@@ -7450,6 +7658,21 @@
inodedep->id_nlinkdelta != 0 ||
inodedep->id_savedino1 != NULL)
return (0);
+ return (1);
+}
+
+/*
+ * Try to free an inodedep structure. Return 1 if it could be freed.
+ */
+static int
+free_inodedep(inodedep)
+ struct inodedep *inodedep;
+{
+
+ LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
+ if ((inodedep->id_state & (ONWORKLIST | UNLINKED)) != 0 ||
+ !check_inodedep_free(inodedep))
+ return (0);
if (inodedep->id_state & ONDEPLIST)
LIST_REMOVE(inodedep, id_deps);
LIST_REMOVE(inodedep, id_hash);
@@ -7476,7 +7699,8 @@
int bsize;
int needj;
- mtx_assert(&lk, MA_OWNED);
+ ump = VFSTOUFS(freework->fw_list.wk_mp);
+ LOCK_OWNED(ump);
/*
* Handle partial truncate separately.
*/
@@ -7485,7 +7709,6 @@
return;
}
freeblks = freework->fw_freeblks;
- ump = VFSTOUFS(freeblks->fb_list.wk_mp);
fs = ump->um_fs;
needj = MOUNTEDSUJ(freeblks->fb_list.wk_mp) != 0;
bsize = lfragtosize(fs, freework->fw_frags);
@@ -7517,7 +7740,7 @@
freeblks->fb_cgwait++;
WORKLIST_INSERT(&wkhd, &freework->fw_list);
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
freeblks_free(ump, freeblks, btodb(bsize));
CTR4(KTR_SUJ,
"freework_freeblock: ino %d blkno %jd lbn %jd size %ld",
@@ -7524,7 +7747,7 @@
freeblks->fb_inum, freework->fw_blkno, freework->fw_lbn, bsize);
ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize,
freeblks->fb_inum, freeblks->fb_vtype, &wkhd);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
/*
* The jnewblk will be discarded and the bits in the map never
* made it to disk. We can immediately free the freeblk.
@@ -7580,10 +7803,10 @@
return;
}
freework->fw_state |= INPROGRESS;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
indir_trunc(freework, fsbtodb(fs, freework->fw_blkno),
freework->fw_lbn);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
/*
@@ -7639,7 +7862,7 @@
KASSERT(LIST_EMPTY(&freeblks->fb_jblkdephd),
("handle_workitem_freeblocks: Journal entries not written."));
ump = VFSTOUFS(freeblks->fb_list.wk_mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
while ((wk = LIST_FIRST(&freeblks->fb_freeworkhd)) != NULL) {
WORKLIST_REMOVE(wk);
switch (wk->wk_type) {
@@ -7656,11 +7879,11 @@
aip = WK_ALLOCINDIR(wk);
freework = NULL;
if (aip->ai_state & DELAYEDFREE) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
freework = newfreework(ump, freeblks, NULL,
aip->ai_lbn, aip->ai_newblkno,
ump->um_fs->fs_frag, 0, 0);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
newblk = WK_NEWBLK(wk);
if (newblk->nb_jnewblk) {
@@ -7688,7 +7911,7 @@
wake_worklist(&freeblks->fb_list);
freeblks = NULL;
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (freeblks)
return handle_complete_freeblocks(freeblks, flags);
return (0);
@@ -7775,7 +7998,7 @@
quotaadj(freeblks->fb_quota, ump, -spare);
quotarele(freeblks->fb_quota);
#endif
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
if (freeblks->fb_state & ONDEPLIST) {
inodedep_lookup(freeblks->fb_list.wk_mp, freeblks->fb_inum,
0, &inodedep);
@@ -7790,7 +8013,7 @@
*/
handle_jwork(&freeblks->fb_jwork);
WORKITEM_FREE(freeblks, D_FREEBLKS);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return (0);
}
@@ -7818,8 +8041,8 @@
struct fs *fs;
struct indirdep *indirdep;
struct ufsmount *ump;
- ufs1_daddr_t *bap1 = 0;
- ufs2_daddr_t nb, nnb, *bap2 = 0;
+ ufs1_daddr_t *bap1;
+ ufs2_daddr_t nb, nnb, *bap2;
ufs_lbn_t lbnadd, nlbn;
int i, nblocks, ufs1fmt;
int freedblocks;
@@ -7869,7 +8092,7 @@
brelse(bp);
return;
}
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
/* Protects against a race with complete_trunc_indir(). */
freework->fw_state &= ~INPROGRESS;
/*
@@ -7886,7 +8109,7 @@
if (freework->fw_indir == NULL)
TAILQ_INSERT_TAIL(&indirdep->ir_trunc,
freework, fw_next);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return;
}
/*
@@ -7893,21 +8116,21 @@
* If we're goingaway, free the indirdep. Otherwise it will
* linger until the write completes.
*/
- if (goingaway) {
+ if (goingaway)
free_indirdep(indirdep);
- ump->um_numindirdeps -= 1;
- }
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
/* Initialize pointers depending on block size. */
if (ump->um_fstype == UFS1) {
bap1 = (ufs1_daddr_t *)bp->b_data;
nb = bap1[freework->fw_off];
ufs1fmt = 1;
+ bap2 = NULL;
} else {
bap2 = (ufs2_daddr_t *)bp->b_data;
nb = bap2[freework->fw_off];
ufs1fmt = 0;
+ bap1 = NULL;
}
level = lbn_level(lbn);
needj = MOUNTEDSUJ(UFSTOVFS(ump)) != 0;
@@ -7978,7 +8201,7 @@
* indirect can be completed when its children are free.
*/
if (needj) {
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
freework->fw_off = i;
freework->fw_ref += freedeps;
freework->fw_ref -= NINDIR(fs) + 1;
@@ -7986,7 +8209,7 @@
freeblks->fb_cgwait += freedeps;
if (freework->fw_ref == 0)
freework_freeblock(freework);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return;
}
/*
@@ -8001,9 +8224,9 @@
/* Non SUJ softdep does single-threaded truncations. */
if (freework->fw_blkno == dbn) {
freework->fw_state |= ALLCOMPLETE;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
handle_written_freework(freework);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
return;
}
@@ -8068,7 +8291,8 @@
* Create the mkdir dependencies for . and .. in a new directory. Link them
* in to a newdirblk so any subsequent additions are tracked properly. The
* caller is responsible for adding the mkdir1 dependency to the journal
- * and updating id_mkdiradd. This function returns with lk held.
+ * and updating id_mkdiradd. This function returns with the per-filesystem
+ * lock held.
*/
static struct mkdir *
setup_newdir(dap, newinum, dinum, newdirbp, mkdirp)
@@ -8081,13 +8305,15 @@
struct newblk *newblk;
struct pagedep *pagedep;
struct inodedep *inodedep;
- struct newdirblk *newdirblk = 0;
+ struct newdirblk *newdirblk;
struct mkdir *mkdir1, *mkdir2;
struct worklist *wk;
struct jaddref *jaddref;
+ struct ufsmount *ump;
struct mount *mp;
mp = dap->da_list.wk_mp;
+ ump = VFSTOUFS(mp);
newdirblk = malloc(sizeof(struct newdirblk), M_NEWDIRBLK,
M_SOFTDEP_FLAGS);
workitem_alloc(&newdirblk->db_list, D_NEWDIRBLK, mp);
@@ -8110,8 +8336,8 @@
* Dependency on "." and ".." being written to disk.
*/
mkdir1->md_buf = newdirbp;
- ACQUIRE_LOCK(&lk);
- LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs);
+ ACQUIRE_LOCK(VFSTOUFS(mp));
+ LIST_INSERT_HEAD(&ump->softdep_mkdirlisthd, mkdir1, md_mkdirs);
/*
* We must link the pagedep, allocdirect, and newdirblk for
* the initial file page so the pointer to the new directory
@@ -8150,7 +8376,7 @@
KASSERT(jaddref != NULL && jaddref->ja_parent == newinum &&
(jaddref->ja_state & MKDIR_PARENT),
("setup_newdir: bad dotdot jaddref %p", jaddref));
- LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);
+ LIST_INSERT_HEAD(&ump->softdep_mkdirlisthd, mkdir2, md_mkdirs);
mkdir2->md_jaddref = jaddref;
jaddref->ja_mkdir = mkdir2;
} else if (inodedep == NULL ||
@@ -8157,8 +8383,9 @@
(inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
dap->da_state &= ~MKDIR_PARENT;
WORKITEM_FREE(mkdir2, D_MKDIR);
+ mkdir2 = NULL;
} else {
- LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);
+ LIST_INSERT_HEAD(&ump->softdep_mkdirlisthd, mkdir2, md_mkdirs);
WORKLIST_INSERT(&inodedep->id_bufwait, &mkdir2->md_list);
}
*mkdirp = mkdir2;
@@ -8205,12 +8432,17 @@
struct newblk *newblk;
struct pagedep *pagedep;
struct inodedep *inodedep;
- struct newdirblk *newdirblk = 0;
+ struct newdirblk *newdirblk;
struct mkdir *mkdir1, *mkdir2;
struct jaddref *jaddref;
+ struct ufsmount *ump;
struct mount *mp;
int isindir;
+ ump = dp->i_ump;
+ mp = UFSTOVFS(ump);
+ KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+ ("softdep_setup_directory_add called on non-softdep filesystem"));
/*
* Whiteouts have no dependencies.
*/
@@ -8221,7 +8453,6 @@
}
jaddref = NULL;
mkdir1 = mkdir2 = NULL;
- mp = UFSTOVFS(dp->i_ump);
fs = dp->i_fs;
lbn = lblkno(fs, diroffset);
offset = blkoff(fs, diroffset);
@@ -8233,6 +8464,7 @@
dap->da_state = ATTACHED;
LIST_INIT(&dap->da_jwork);
isindir = bp->b_lblkno >= NDADDR;
+ newdirblk = NULL;
if (isnewblk &&
(isindir ? blkoff(fs, diroffset) : fragoff(fs, diroffset)) == 0) {
newdirblk = malloc(sizeof(struct newdirblk),
@@ -8247,7 +8479,7 @@
*/
if (newdirbp == NULL) {
dap->da_state |= DEPCOMPLETE;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
} else {
dap->da_state |= MKDIR_BODY | MKDIR_PARENT;
mkdir1 = setup_newdir(dap, newinum, dp->i_number, newdirbp,
@@ -8265,7 +8497,7 @@
dap->da_pagedep = pagedep;
LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,
da_pdlist);
- inodedep_lookup(mp, newinum, DEPALLOC | NODELAY, &inodedep);
+ inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
/*
* If we're journaling, link the diradd into the jaddref so it
* may be completed after the journal entry is written. Otherwise,
@@ -8323,7 +8555,7 @@
inodedep->id_mkdiradd = dap;
} else if (inodedep->id_mkdiradd)
merge_diradd(inodedep, dap);
- if (newdirblk) {
+ if (newdirblk != NULL) {
/*
* There is nothing to do if we are already tracking
* this block.
@@ -8330,7 +8562,7 @@
*/
if ((pagedep->pd_state & NEWBLOCK) != 0) {
WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return (0);
}
if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, &newblk)
@@ -8340,7 +8572,7 @@
pagedep->pd_state |= NEWBLOCK;
pagedep->pd_newdirblk = newdirblk;
newdirblk->db_pagedep = pagedep;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
/*
* If we extended into an indirect signal direnter to sync.
*/
@@ -8348,7 +8580,7 @@
return (1);
return (0);
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return (0);
}
@@ -8378,6 +8610,9 @@
int flags;
mp = UFSTOVFS(dp->i_ump);
+ KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+ ("softdep_change_directoryentry_offset called on "
+ "non-softdep filesystem"));
de = (struct direct *)oldloc;
jmvref = NULL;
flags = 0;
@@ -8396,7 +8631,7 @@
offset = blkoff(dp->i_fs, dp->i_offset);
oldoffset = offset + (oldloc - base);
newoffset = offset + (newloc - base);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(dp->i_ump);
if (pagedep_lookup(mp, bp, dp->i_number, lbn, flags, &pagedep) == 0)
goto done;
dap = diradd_lookup(pagedep, oldoffset);
@@ -8418,7 +8653,7 @@
add_to_journal(&jmvref->jm_list);
}
bcopy(oldloc, newloc, entrysize);
- FREE_LOCK(&lk);
+ FREE_LOCK(dp->i_ump);
}
/*
@@ -8434,6 +8669,7 @@
{
struct diradd *olddap;
struct mkdir *mkdir, *nextmd;
+ struct ufsmount *ump;
short state;
olddap = inodedep->id_mkdiradd;
@@ -8440,7 +8676,9 @@
inodedep->id_mkdiradd = newdap;
if ((olddap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
newdap->da_state &= ~DEPCOMPLETE;
- for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) {
+ ump = VFSTOUFS(inodedep->id_list.wk_mp);
+ for (mkdir = LIST_FIRST(&ump->softdep_mkdirlisthd); mkdir;
+ mkdir = nextmd) {
nextmd = LIST_NEXT(mkdir, md_mkdirs);
if (mkdir->md_diradd != olddap)
continue;
@@ -8499,6 +8737,7 @@
struct inodedep *inodedep;
struct jaddref *jaddref;
struct inoref *inoref;
+ struct ufsmount *ump;
struct mkdir *mkdir;
/*
@@ -8535,7 +8774,8 @@
* journaling.
*/
if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
- LIST_FOREACH(mkdir, &mkdirlisthd, md_mkdirs) {
+ ump = VFSTOUFS(dap->da_list.wk_mp);
+ LIST_FOREACH(mkdir, &ump->softdep_mkdirlisthd, md_mkdirs) {
if (mkdir->md_diradd != dap)
continue;
if ((jaddref = mkdir->md_jaddref) == NULL)
@@ -8580,8 +8820,10 @@
struct pagedep *pagedep;
struct inodedep *inodedep;
struct mkdir *mkdir, *nextmd;
+ struct ufsmount *ump;
- mtx_assert(&lk, MA_OWNED);
+ ump = VFSTOUFS(dap->da_list.wk_mp);
+ LOCK_OWNED(ump);
LIST_REMOVE(dap, da_pdlist);
if (dap->da_state & ONWORKLIST)
WORKLIST_REMOVE(&dap->da_list);
@@ -8600,7 +8842,8 @@
if (inodedep->id_mkdiradd == dap)
inodedep->id_mkdiradd = NULL;
if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
- for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) {
+ for (mkdir = LIST_FIRST(&ump->softdep_mkdirlisthd); mkdir;
+ mkdir = nextmd) {
nextmd = LIST_NEXT(mkdir, md_mkdirs);
if (mkdir->md_diradd != dap)
continue;
@@ -8655,6 +8898,8 @@
struct inodedep *inodedep;
int direct;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ip->i_ump)) != 0,
+ ("softdep_setup_remove called on non-softdep filesystem"));
/*
* Allocate a new dirrem if appropriate and ACQUIRE_LOCK. We want
* newdirrem() to setup the full directory remove which requires
@@ -8686,7 +8931,7 @@
if ((dirrem->dm_state & COMPLETE) == 0) {
LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
dm_next);
- FREE_LOCK(&lk);
+ FREE_LOCK(ip->i_ump);
} else {
if (prevdirrem != NULL)
LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd,
@@ -8693,7 +8938,7 @@
prevdirrem, dm_next);
dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
direct = LIST_EMPTY(&dirrem->dm_jremrefhd);
- FREE_LOCK(&lk);
+ FREE_LOCK(ip->i_ump);
if (direct)
handle_workitem_remove(dirrem, 0);
}
@@ -8765,6 +9010,7 @@
{
struct inodedep *inodedep;
struct jaddref *jaddref;
+ struct ufsmount *ump;
struct mkdir *mkdir;
struct diradd *dap;
@@ -8774,7 +9020,8 @@
dap = inodedep->id_mkdiradd;
if (dap == NULL || (dap->da_state & MKDIR_PARENT) == 0)
return (jremref);
- for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir;
+ ump = VFSTOUFS(inodedep->id_list.wk_mp);
+ for (mkdir = LIST_FIRST(&ump->softdep_mkdirlisthd); mkdir;
mkdir = LIST_NEXT(mkdir, md_mkdirs))
if (mkdir->md_diradd == dap && mkdir->md_state & MKDIR_PARENT)
break;
@@ -8863,16 +9110,19 @@
panic("newdirrem: whiteout");
dvp = ITOV(dp);
/*
- * If we are over our limit, try to improve the situation.
+ * If the system is over its limit and our filesystem is
+ * responsible for more than our share of that usage and
+ * we are not a snapshot, request some inodedep cleanup.
* Limiting the number of dirrem structures will also limit
* the number of freefile and freeblks structures.
*/
- ACQUIRE_LOCK(&lk);
- if (!IS_SNAPSHOT(ip) && dep_current[D_DIRREM] > max_softdeps / 2)
- (void) request_cleanup(ITOV(dp)->v_mount, FLUSH_BLOCKS);
- FREE_LOCK(&lk);
- dirrem = malloc(sizeof(struct dirrem),
- M_DIRREM, M_SOFTDEP_FLAGS|M_ZERO);
+ ACQUIRE_LOCK(ip->i_ump);
+ if (!IS_SNAPSHOT(ip) && softdep_excess_items(ip->i_ump, D_DIRREM))
+ schedule_cleanup(ITOV(dp)->v_mount);
+ else
+ FREE_LOCK(ip->i_ump);
+ dirrem = malloc(sizeof(struct dirrem), M_DIRREM, M_SOFTDEP_FLAGS |
+ M_ZERO);
workitem_alloc(&dirrem->dm_list, D_DIRREM, dvp->v_mount);
LIST_INIT(&dirrem->dm_jremrefhd);
LIST_INIT(&dirrem->dm_jwork);
@@ -8900,7 +9150,7 @@
jremref = newjremref(dirrem, dp, ip, dp->i_offset,
ip->i_effnlink + 1);
}
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ip->i_ump);
lbn = lblkno(dp->i_fs, dp->i_offset);
offset = blkoff(dp->i_fs, dp->i_offset);
pagedep_lookup(UFSTOVFS(dp->i_ump), bp, dp->i_number, lbn, DEPALLOC,
@@ -8947,8 +9197,8 @@
if ((dap->da_state & ATTACHED) == 0)
panic("newdirrem: not ATTACHED");
if (dap->da_newinum != ip->i_number)
- panic("newdirrem: inum %d should be %d",
- ip->i_number, dap->da_newinum);
+ panic("newdirrem: inum %ju should be %ju",
+ (uintmax_t)ip->i_number, (uintmax_t)dap->da_newinum);
/*
* If we are deleting a changed name that never made it to disk,
* then return the dirrem describing the previous inode (which
@@ -9013,6 +9263,8 @@
offset = blkoff(dp->i_fs, dp->i_offset);
mp = UFSTOVFS(dp->i_ump);
+ KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+ ("softdep_setup_directory_change called on non-softdep filesystem"));
/*
* Whiteouts do not need diradd dependencies.
@@ -9060,7 +9312,7 @@
if (LIST_EMPTY(&dirrem->dm_jremrefhd))
add_to_worklist(&dirrem->dm_list, 0);
}
- FREE_LOCK(&lk);
+ FREE_LOCK(dp->i_ump);
return;
}
/*
@@ -9106,7 +9358,7 @@
* inode is not yet written. If it is written, do the post-inode
* write processing to put it on the id_pendinghd list.
*/
- inodedep_lookup(mp, newinum, DEPALLOC | NODELAY, &inodedep);
+ inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
if (MOUNTEDSUJ(mp)) {
jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
inoreflst);
@@ -9134,7 +9386,7 @@
*/
if (inodedep->id_mkdiradd && dp->i_offset != DOTDOT_OFFSET)
merge_diradd(inodedep, dap);
- FREE_LOCK(&lk);
+ FREE_LOCK(dp->i_ump);
}
/*
@@ -9148,17 +9400,16 @@
struct inode *ip; /* the inode with the increased link count */
{
struct inodedep *inodedep;
- int dflags;
- ACQUIRE_LOCK(&lk);
- dflags = DEPALLOC;
- if (IS_SNAPSHOT(ip))
- dflags |= NODELAY;
- inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, dflags, &inodedep);
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ip->i_ump)) != 0,
+ ("softdep_change_linkcnt called on non-softdep filesystem"));
+ ACQUIRE_LOCK(ip->i_ump);
+ inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC,
+ &inodedep);
if (ip->i_nlink < ip->i_effnlink)
panic("softdep_change_linkcnt: bad delta");
inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
- FREE_LOCK(&lk);
+ FREE_LOCK(ip->i_ump);
}
/*
@@ -9174,8 +9425,8 @@
struct sbdep *sbdep;
struct worklist *wk;
- if (MOUNTEDSUJ(UFSTOVFS(ump)) == 0)
- return;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
+ ("softdep_setup_sbupdate called on non-softdep filesystem"));
LIST_FOREACH(wk, &bp->b_dep, wk_list)
if (wk->wk_type == D_SBDEP)
break;
@@ -9185,9 +9436,9 @@
workitem_alloc(&sbdep->sb_list, D_SBDEP, UFSTOVFS(ump));
sbdep->sb_fs = fs;
sbdep->sb_ump = ump;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
WORKLIST_INSERT(&bp->b_dep, &sbdep->sb_list);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
/*
@@ -9201,7 +9452,7 @@
struct inodedep *inodedep;
struct inodedep *idp;
- mtx_assert(&lk, MA_OWNED);
+ LOCK_OWNED(ump);
for (inodedep = TAILQ_LAST(&ump->softdep_unlinked, inodedeplst);
inodedep; inodedep = idp) {
if ((inodedep->id_state & UNLINKNEXT) == 0)
@@ -9247,12 +9498,10 @@
struct buf *bp;
{
struct inodedep *inodedep;
- struct mount *mp;
struct fs *fs;
- mtx_assert(&lk, MA_OWNED);
+ LOCK_OWNED(sbdep->sb_ump);
fs = sbdep->sb_fs;
- mp = UFSTOVFS(sbdep->sb_ump);
/*
* If the superblock doesn't match the in-memory list start over.
*/
@@ -9292,10 +9541,10 @@
{
struct ufsmount *ump;
- mtx_assert(&lk, MA_OWNED);
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
if (MOUNTEDSUJ(mp) == 0)
return;
- ump = VFSTOUFS(mp);
ump->um_fs->fs_fmod = 1;
if (inodedep->id_state & UNLINKED)
panic("unlinked_inodedep: %p already unlinked\n", inodedep);
@@ -9326,7 +9575,7 @@
ino = inodedep->id_ino;
error = 0;
for (;;) {
- mtx_assert(&lk, MA_OWNED);
+ LOCK_OWNED(ump);
KASSERT((inodedep->id_state & UNLINKED) != 0,
("clear_unlinked_inodedep: inodedep %p not unlinked",
inodedep));
@@ -9364,15 +9613,18 @@
pino = 0;
if (idp && (idp->id_state & UNLINKNEXT))
pino = idp->id_ino;
- FREE_LOCK(&lk);
- if (pino == 0)
+ FREE_LOCK(ump);
+ if (pino == 0) {
bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
(int)fs->fs_sbsize, 0, 0, 0);
- else
+ } else {
error = bread(ump->um_devvp,
fsbtodb(fs, ino_to_fsba(fs, pino)),
(int)fs->fs_bsize, NOCRED, &bp);
- ACQUIRE_LOCK(&lk);
+ if (error)
+ brelse(bp);
+ }
+ ACQUIRE_LOCK(ump);
if (error)
break;
/* If the list has changed restart the loop. */
@@ -9382,9 +9634,9 @@
nino = idp->id_ino;
if (nino != pino ||
(inodedep->id_state & UNLINKPREV) != UNLINKPREV) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
brelse(bp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
continue;
}
nino = 0;
@@ -9400,7 +9652,7 @@
inodedep));
inodedep->id_state &= ~(UNLINKED | UNLINKLINKS | UNLINKONLIST);
TAILQ_REMOVE(&ump->softdep_unlinked, inodedep, id_unlinked);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
/*
* The predecessor's next pointer is manually updated here
* so that the NEXT flag is never cleared for an element
@@ -9422,13 +9674,13 @@
* filesystem is corrupted already.
*/
bwrite(bp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
/*
* If the superblock pointer still needs to be cleared force
* a write here.
*/
if (fs->fs_sujfree == ino) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
(int)fs->fs_sbsize, 0, 0, 0);
bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
@@ -9436,7 +9688,7 @@
softdep_setup_sbupdate(ump, (struct fs *)bp->b_data,
bp);
bwrite(bp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
if (fs->fs_sujfree != ino)
@@ -9478,7 +9730,7 @@
if (ffs_vgetf(mp, oldinum, flags, &vp, FFSV_FORCEINSMQ) != 0)
return (EBUSY);
ip = VTOI(vp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
if ((inodedep_lookup(mp, oldinum, 0, &inodedep)) == 0)
panic("handle_workitem_remove: lost inodedep");
if (dirrem->dm_state & ONDEPLIST)
@@ -9520,7 +9772,7 @@
("handle_workitem_remove: worklist not empty. %s",
TYPENAME(LIST_FIRST(&dirrem->dm_jwork)->wk_type)));
WORKITEM_FREE(dirrem, D_DIRREM);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
goto out;
}
/*
@@ -9546,7 +9798,7 @@
KASSERT(LIST_EMPTY(&dirrem->dm_jwork),
("handle_workitem_remove: DIRCHG and worklist not empty."));
WORKITEM_FREE(dirrem, D_DIRREM);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
goto out;
}
dirrem->dm_state = ONDEPLIST;
@@ -9567,12 +9819,12 @@
if (inodedep == NULL ||
(inodedep->id_state & (DEPCOMPLETE | UNLINKED)) == UNLINKED ||
check_inode_unwritten(inodedep)) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
vput(vp);
return handle_workitem_remove(dirrem, flags);
}
WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
ip->i_flag |= IN_CHANGE;
out:
ffs_update(vp, 0);
@@ -9607,9 +9859,9 @@
ump = VFSTOUFS(freefile->fx_list.wk_mp);
fs = ump->um_fs;
#ifdef DEBUG
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
error = inodedep_lookup(UFSTOVFS(ump), freefile->fx_oldinum, 0, &idp);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (error)
panic("handle_workitem_freefile: inodedep %p survived", idp);
#endif
@@ -9621,9 +9873,9 @@
if ((error = ffs_freefile(ump, fs, freefile->fx_devvp,
freefile->fx_oldinum, freefile->fx_mode, &wkhd)) != 0)
softdep_error("handle_workitem_freefile", error);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
WORKITEM_FREE(freefile, D_FREEFILE);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
@@ -9673,6 +9925,7 @@
struct freeblks *freeblks;
struct jblkdep *jblkdep;
struct newblk *newblk;
+ struct ufsmount *ump;
/*
* We only care about write operations. There should never
@@ -9685,10 +9938,13 @@
panic("softdep_disk_io_initiation: Writing buffer with "
"background write in progress: %p", bp);
+ if ((wk = LIST_FIRST(&bp->b_dep)) == NULL)
+ return;
+ ump = VFSTOUFS(wk->wk_mp);
+
marker.wk_type = D_LAST + 1; /* Not a normal workitem */
PHOLD(curproc); /* Don't swap out kernel stack */
-
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
/*
* Do any necessary pre-I/O processing.
*/
@@ -9772,7 +10028,7 @@
/* NOTREACHED */
}
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
PRELE(curproc); /* Allow swapout of kernel stack */
}
@@ -9808,8 +10064,8 @@
* Wait for all journal remove dependencies to hit the disk.
* We can not allow any potentially conflicting directory adds
* to be visible before removes and rollback is too difficult.
- * lk may be dropped and re-acquired, however we hold the buf
- * locked so the dependency can not go away.
+ * The per-filesystem lock may be dropped and re-acquired, however
+ * we hold the buf locked so the dependency can not go away.
*/
LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next)
while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL)
@@ -9821,9 +10077,10 @@
ep = (struct direct *)
((char *)bp->b_data + dap->da_offset);
if (ep->d_ino != dap->da_newinum)
- panic("%s: dir inum %d != new %d",
+ panic("%s: dir inum %ju != new %ju",
"initiate_write_filepage",
- ep->d_ino, dap->da_newinum);
+ (uintmax_t)ep->d_ino,
+ (uintmax_t)dap->da_newinum);
if (dap->da_state & DIRCHG)
ep->d_ino = dap->da_previous->dm_oldinum;
else
@@ -9853,6 +10110,7 @@
struct ufs1_dinode *dp;
struct ufs1_dinode *sip;
struct inoref *inoref;
+ struct ufsmount *ump;
struct fs *fs;
ufs_lbn_t i;
#ifdef INVARIANTS
@@ -9864,6 +10122,8 @@
panic("initiate_write_inodeblock_ufs1: already started");
inodedep->id_state |= IOSTARTED;
fs = inodedep->id_fs;
+ ump = VFSTOUFS(inodedep->id_list.wk_mp);
+ LOCK_OWNED(ump);
dp = (struct ufs1_dinode *)bp->b_data +
ino_to_fsbo(fs, inodedep->id_ino);
@@ -9884,10 +10144,10 @@
if ((inodedep->id_state & DEPCOMPLETE) == 0) {
if (inodedep->id_savedino1 != NULL)
panic("initiate_write_inodeblock_ufs1: I/O underway");
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
sip = malloc(sizeof(struct ufs1_dinode),
M_SAVEDINO, M_SOFTDEP_FLAGS);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
inodedep->id_savedino1 = sip;
*inodedep->id_savedino1 = *dp;
bzero((caddr_t)dp, sizeof(struct ufs1_dinode));
@@ -10020,6 +10280,7 @@
struct ufs2_dinode *dp;
struct ufs2_dinode *sip;
struct inoref *inoref;
+ struct ufsmount *ump;
struct fs *fs;
ufs_lbn_t i;
#ifdef INVARIANTS
@@ -10031,6 +10292,8 @@
panic("initiate_write_inodeblock_ufs2: already started");
inodedep->id_state |= IOSTARTED;
fs = inodedep->id_fs;
+ ump = VFSTOUFS(inodedep->id_list.wk_mp);
+ LOCK_OWNED(ump);
dp = (struct ufs2_dinode *)bp->b_data +
ino_to_fsbo(fs, inodedep->id_ino);
@@ -10051,10 +10314,10 @@
if ((inodedep->id_state & DEPCOMPLETE) == 0) {
if (inodedep->id_savedino2 != NULL)
panic("initiate_write_inodeblock_ufs2: I/O underway");
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
sip = malloc(sizeof(struct ufs2_dinode),
M_SAVEDINO, M_SOFTDEP_FLAGS);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
inodedep->id_savedino2 = sip;
*inodedep->id_savedino2 = *dp;
bzero((caddr_t)dp, sizeof(struct ufs2_dinode));
@@ -10265,18 +10528,17 @@
LIST_REMOVE(indirdep, ir_next);
}
indirdep->ir_state |= GOINGAWAY;
- VFSTOUFS(indirdep->ir_list.wk_mp)->um_numindirdeps += 1;
/*
* Pass in bp for blocks still have journal writes
* pending so we can cancel them on their own.
*/
- while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0)
+ while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != NULL)
cancel_allocindir(aip, bp, freeblks, 0);
- while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0)
+ while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != NULL)
cancel_allocindir(aip, NULL, freeblks, 0);
- while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != 0)
+ while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != NULL)
cancel_allocindir(aip, NULL, freeblks, 0);
- while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != 0)
+ while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL)
cancel_allocindir(aip, NULL, freeblks, 0);
/*
* If there are pending partial truncations we need to keep the
@@ -10331,6 +10593,7 @@
struct indirdep *indirdep;
struct buf *bp;
{
+ struct ufsmount *ump;
indirdep->ir_state |= IOSTARTED;
if (indirdep->ir_state & GOINGAWAY)
@@ -10346,10 +10609,12 @@
* Replace up-to-date version with safe version.
*/
if (indirdep->ir_saveddata == NULL) {
- FREE_LOCK(&lk);
+ ump = VFSTOUFS(indirdep->ir_list.wk_mp);
+ LOCK_OWNED(ump);
+ FREE_LOCK(ump);
indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP,
M_SOFTDEP_FLAGS);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
indirdep->ir_state &= ~ATTACHED;
indirdep->ir_state |= UNDONE;
@@ -10371,19 +10636,24 @@
{
struct worklist *wk, *wkn;
struct inodedep *inodedep;
+ struct ufsmount *ump;
uint8_t *inosused;
struct cg *cgp;
struct fs *fs;
- ACQUIRE_LOCK(&lk);
- fs = VFSTOUFS(mp)->um_fs;
+ KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+ ("softdep_setup_inofree called on non-softdep filesystem"));
+ ump = VFSTOUFS(mp);
+ ACQUIRE_LOCK(ump);
+ fs = ump->um_fs;
cgp = (struct cg *)bp->b_data;
inosused = cg_inosused(cgp);
if (isset(inosused, ino % fs->fs_ipg))
- panic("softdep_setup_inofree: inode %d not freed.", ino);
+ panic("softdep_setup_inofree: inode %ju not freed.",
+ (uintmax_t)ino);
if (inodedep_lookup(mp, ino, 0, &inodedep))
- panic("softdep_setup_inofree: ino %d has existing inodedep %p",
- ino, inodedep);
+ panic("softdep_setup_inofree: ino %ju has existing inodedep %p",
+ (uintmax_t)ino, inodedep);
if (wkhd) {
LIST_FOREACH_SAFE(wk, wkhd, wk_list, wkn) {
if (wk->wk_type != D_JADDREF)
@@ -10393,13 +10663,13 @@
* We can free immediately even if the jaddref
* isn't attached in a background write as now
* the bitmaps are reconciled.
- */
+ */
wk->wk_state |= COMPLETE | ATTACHED;
free_jaddref(WK_JADDREF(wk));
}
jwork_move(&bp->b_dep, wkhd);
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
@@ -10419,6 +10689,7 @@
{
struct bmsafemap *bmsafemap;
struct jnewblk *jnewblk;
+ struct ufsmount *ump;
struct worklist *wk;
struct fs *fs;
#ifdef SUJ_DEBUG
@@ -10435,9 +10706,12 @@
"softdep_setup_blkfree: blkno %jd frags %d wk head %p",
blkno, frags, wkhd);
- ACQUIRE_LOCK(&lk);
+ ump = VFSTOUFS(mp);
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
+ ("softdep_setup_blkfree called on non-softdep filesystem"));
+ ACQUIRE_LOCK(ump);
/* Lookup the bmsafemap so we track when it is dirty. */
- fs = VFSTOUFS(mp)->um_fs;
+ fs = ump->um_fs;
bmsafemap = bmsafemap_lookup(mp, bp, dtog(fs, blkno), NULL);
/*
* Detach any jnewblks which have been canceled. They must linger
@@ -10511,7 +10785,7 @@
}
}
#endif
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
/*
@@ -10518,7 +10792,7 @@
* Revert a block allocation when the journal record that describes it
* is not yet written.
*/
-int
+static int
jnewblk_rollback(jnewblk, fs, cgp, blksfree)
struct jnewblk *jnewblk;
struct fs *fs;
@@ -10563,7 +10837,7 @@
/* Add back in counts associated with the new frags */
blk = blkmap(fs, blksfree, bbase);
ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
- /* If a complete block has been reassembled, account for it. */
+ /* If a complete block has been reassembled, account for it. */
fragno = fragstoblks(fs, bbase);
if (ffs_isblock(fs, blksfree, fragno)) {
cgp->cg_cs.cs_nffree -= fs->fs_frag;
@@ -10578,7 +10852,7 @@
return (frags);
}
-static void
+static void
initiate_write_bmsafemap(bmsafemap, bp)
struct bmsafemap *bmsafemap;
struct buf *bp; /* The cg block. */
@@ -10591,6 +10865,10 @@
struct fs *fs;
ino_t ino;
+ /*
+ * If this is a background write, we did this at the time that
+ * the copy was made, so do not need to do it again.
+ */
if (bmsafemap->sm_state & IOSTARTED)
return;
bmsafemap->sm_state |= IOSTARTED;
@@ -10612,8 +10890,8 @@
jaddref->ja_state |= UNDONE;
stat_jaddref++;
} else
- panic("initiate_write_bmsafemap: inode %d "
- "marked free", jaddref->ja_ino);
+ panic("initiate_write_bmsafemap: inode %ju "
+ "marked free", (uintmax_t)jaddref->ja_ino);
}
}
/*
@@ -10657,6 +10935,7 @@
{
struct worklist *wk;
struct worklist *owk;
+ struct ufsmount *ump;
struct workhead reattach;
struct freeblks *freeblks;
struct buf *sbp;
@@ -10663,10 +10942,42 @@
/*
* If an error occurred while doing the write, then the data
- * has not hit the disk and the dependencies cannot be unrolled.
+ * has not hit the disk and the dependencies cannot be processed.
+ * But we do have to go through and roll forward any dependencies
+ * that were rolled back before the disk write.
*/
- if ((bp->b_ioflags & BIO_ERROR) != 0 && (bp->b_flags & B_INVAL) == 0)
+ if ((bp->b_ioflags & BIO_ERROR) != 0 && (bp->b_flags & B_INVAL) == 0) {
+ LIST_FOREACH(wk, &bp->b_dep, wk_list) {
+ switch (wk->wk_type) {
+
+ case D_PAGEDEP:
+ handle_written_filepage(WK_PAGEDEP(wk), bp, 0);
+ continue;
+
+ case D_INODEDEP:
+ handle_written_inodeblock(WK_INODEDEP(wk),
+ bp, 0);
+ continue;
+
+ case D_BMSAFEMAP:
+ handle_written_bmsafemap(WK_BMSAFEMAP(wk),
+ bp, 0);
+ continue;
+
+ case D_INDIRDEP:
+ handle_written_indirdep(WK_INDIRDEP(wk),
+ bp, &sbp, 0);
+ continue;
+ default:
+ /* nothing to roll forward */
+ continue;
+ }
+ }
return;
+ }
+ if ((wk = LIST_FIRST(&bp->b_dep)) == NULL)
+ return;
+ ump = VFSTOUFS(wk->wk_mp);
LIST_INIT(&reattach);
/*
* This lock must not be released anywhere in this code segment.
@@ -10673,10 +10984,10 @@
*/
sbp = NULL;
owk = NULL;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
WORKLIST_REMOVE(wk);
- dep_write[wk->wk_type]++;
+ atomic_add_long(&dep_write[wk->wk_type], 1);
if (wk == owk)
panic("duplicate worklist: %p\n", wk);
owk = wk;
@@ -10683,17 +10994,20 @@
switch (wk->wk_type) {
case D_PAGEDEP:
- if (handle_written_filepage(WK_PAGEDEP(wk), bp))
+ if (handle_written_filepage(WK_PAGEDEP(wk), bp,
+ WRITESUCCEEDED))
WORKLIST_INSERT(&reattach, wk);
continue;
case D_INODEDEP:
- if (handle_written_inodeblock(WK_INODEDEP(wk), bp))
+ if (handle_written_inodeblock(WK_INODEDEP(wk), bp,
+ WRITESUCCEEDED))
WORKLIST_INSERT(&reattach, wk);
continue;
case D_BMSAFEMAP:
- if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp))
+ if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp,
+ WRITESUCCEEDED))
WORKLIST_INSERT(&reattach, wk);
continue;
@@ -10712,7 +11026,8 @@
continue;
case D_INDIRDEP:
- if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp))
+ if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp,
+ WRITESUCCEEDED))
WORKLIST_INSERT(&reattach, wk);
continue;
@@ -10758,7 +11073,7 @@
WORKLIST_REMOVE(wk);
WORKLIST_INSERT(&bp->b_dep, wk);
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (sbp)
brelse(sbp);
}
@@ -11012,12 +11327,17 @@
* Called from within softdep_disk_write_complete above to restore
* in-memory inode block contents to their most up-to-date state. Note
* that this routine is always called from interrupt level with further
- * splbio interrupts blocked.
+ * interrupts from this device blocked.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
*/
static int
-handle_written_inodeblock(inodedep, bp)
+handle_written_inodeblock(inodedep, bp, flags)
struct inodedep *inodedep;
struct buf *bp; /* buffer containing the inode block */
+ int flags;
{
struct freefile *freefile;
struct allocdirect *adp, *nextadp;
@@ -11047,7 +11367,8 @@
/*
* Leave this inodeblock dirty until it's in the list.
*/
- if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) == UNLINKED) {
+ if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) == UNLINKED &&
+ (flags & WRITESUCCEEDED)) {
struct inodedep *inon;
inon = TAILQ_NEXT(inodedep, id_unlinked);
@@ -11086,7 +11407,8 @@
goto bufwait;
return (1);
}
- inodedep->id_state |= COMPLETE;
+ if (flags & WRITESUCCEEDED)
+ inodedep->id_state |= COMPLETE;
/*
* Roll forward anything that had to be rolled back before
* the inode could be updated.
@@ -11201,6 +11523,13 @@
bdirty(bp);
bufwait:
/*
+ * If the write did not succeed, we have done all the roll-forward
+ * operations, but we cannot take the actions that will allow its
+ * dependencies to be processed.
+ */
+ if ((flags & WRITESUCCEEDED) == 0)
+ return (hadchanges);
+ /*
* Process any allocdirects that completed during the update.
*/
if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
@@ -11257,11 +11586,20 @@
return (hadchanges);
}
+/*
+ * Perform needed roll-forwards and kick off any dependencies that
+ * can now be processed.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
+ */
static int
-handle_written_indirdep(indirdep, bp, bpp)
+handle_written_indirdep(indirdep, bp, bpp, flags)
struct indirdep *indirdep;
struct buf *bp;
struct buf **bpp;
+ int flags;
{
struct allocindir *aip;
struct buf *sbp;
@@ -11286,11 +11624,21 @@
indirdep->ir_state &= ~(UNDONE | IOSTARTED);
indirdep->ir_state |= ATTACHED;
/*
+ * If the write did not succeed, we have done all the roll-forward
+ * operations, but we cannot take the actions that will allow its
+ * dependencies to be processed.
+ */
+ if ((flags & WRITESUCCEEDED) == 0) {
+ stat_indir_blk_ptrs++;
+ bdirty(bp);
+ return (1);
+ }
+ /*
* Move allocindirs with written pointers to the completehd if
* the indirdep's pointer is not yet written. Otherwise
* free them here.
*/
- while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != 0) {
+ while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != NULL) {
LIST_REMOVE(aip, ai_next);
if ((indirdep->ir_state & DEPCOMPLETE) == 0) {
LIST_INSERT_HEAD(&indirdep->ir_completehd, aip,
@@ -11305,7 +11653,7 @@
* the done list to the write list after updating the pointers.
*/
if (TAILQ_EMPTY(&indirdep->ir_trunc)) {
- while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) {
+ while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != NULL) {
handle_allocindir_partdone(aip);
if (aip == LIST_FIRST(&indirdep->ir_donehd))
panic("disk_write_complete: not gone");
@@ -11358,8 +11706,8 @@
}
/*
- * Returns true if the bmsafemap will have rollbacks when written. Must
- * only be called with lk and the buf lock on the cg held.
+ * Returns true if the bmsafemap will have rollbacks when written. Must only
+ * be called with the per-filesystem lock and the buf lock on the cg held.
*/
static int
bmsafemap_backgroundwrite(bmsafemap, bp)
@@ -11368,6 +11716,7 @@
{
int dirty;
+ LOCK_OWNED(VFSTOUFS(bmsafemap->sm_list.wk_mp));
dirty = !LIST_EMPTY(&bmsafemap->sm_jaddrefhd) |
!LIST_EMPTY(&bmsafemap->sm_jnewblkhd);
/*
@@ -11438,11 +11787,16 @@
* Complete a write to a bmsafemap structure. Roll forward any bitmap
* changes if it's not a background write. Set all written dependencies
* to DEPCOMPLETE and free the structure if possible.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
*/
static int
-handle_written_bmsafemap(bmsafemap, bp)
+handle_written_bmsafemap(bmsafemap, bp, flags)
struct bmsafemap *bmsafemap;
struct buf *bp;
+ int flags;
{
struct newblk *newblk;
struct inodedep *inodedep;
@@ -11454,17 +11808,24 @@
struct cg *cgp;
struct fs *fs;
ino_t ino;
+ int foreground;
int chgs;
if ((bmsafemap->sm_state & IOSTARTED) == 0)
- panic("initiate_write_bmsafemap: Not started\n");
+ panic("handle_written_bmsafemap: Not started\n");
ump = VFSTOUFS(bmsafemap->sm_list.wk_mp);
chgs = 0;
bmsafemap->sm_state &= ~IOSTARTED;
+ foreground = (bp->b_xflags & BX_BKGRDMARKER) == 0;
/*
- * Release journal work that was waiting on the write.
+ * If write was successful, release journal work that was waiting
+ * on the write. Otherwise move the work back.
*/
- handle_jwork(&bmsafemap->sm_freewr);
+ if (flags & WRITESUCCEEDED)
+ handle_jwork(&bmsafemap->sm_freewr);
+ else
+ LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr,
+ worklist, wk_list);
/*
* Restore unwritten inode allocation pending jaddref writes.
@@ -11481,7 +11842,8 @@
if (isset(inosused, ino))
panic("handle_written_bmsafemap: "
"re-allocated inode");
- if ((bp->b_xflags & BX_BKGRDMARKER) == 0) {
+ /* Do the roll-forward only if it's a real copy. */
+ if (foreground) {
if ((jaddref->ja_mode & IFMT) == IFDIR)
cgp->cg_cs.cs_ndir++;
cgp->cg_cs.cs_nifree--;
@@ -11504,7 +11866,8 @@
jntmp) {
if ((jnewblk->jn_state & UNDONE) == 0)
continue;
- if ((bp->b_xflags & BX_BKGRDMARKER) == 0 &&
+ /* Do the roll-forward only if it's a real copy. */
+ if (foreground &&
jnewblk_rollforward(jnewblk, fs, cgp, blksfree))
chgs = 1;
jnewblk->jn_state &= ~(UNDONE | NEWBLOCK);
@@ -11512,6 +11875,20 @@
free_jnewblk(jnewblk);
}
}
+ /*
+ * If the write did not succeed, we have done all the roll-forward
+ * operations, but we cannot take the actions that will allow its
+ * dependencies to be processed.
+ */
+ if ((flags & WRITESUCCEEDED) == 0) {
+ LIST_CONCAT(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr,
+ newblk, nb_deps);
+ LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr,
+ worklist, wk_list);
+ if (foreground)
+ bdirty(bp);
+ return (1);
+ }
while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkwr))) {
newblk->nb_state |= DEPCOMPLETE;
newblk->nb_state &= ~ONDEPLIST;
@@ -11544,7 +11921,8 @@
return (0);
}
LIST_INSERT_HEAD(&ump->softdep_dirtycg, bmsafemap, sm_next);
- bdirty(bp);
+ if (foreground)
+ bdirty(bp);
return (1);
}
@@ -11614,12 +11992,17 @@
* A write operation was just completed. Removed inodes can
* now be freed and associated block pointers may be committed.
* Note that this routine is always called from interrupt level
- * with further splbio interrupts blocked.
+ * with further interrupts from this device blocked.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
*/
static int
-handle_written_filepage(pagedep, bp)
+handle_written_filepage(pagedep, bp, flags)
struct pagedep *pagedep;
struct buf *bp; /* buffer containing the written page */
+ int flags;
{
struct dirrem *dirrem;
struct diradd *dap, *nextdap;
@@ -11629,6 +12012,8 @@
if ((pagedep->pd_state & IOSTARTED) == 0)
panic("handle_written_filepage: not started");
pagedep->pd_state &= ~IOSTARTED;
+ if ((flags & WRITESUCCEEDED) == 0)
+ goto rollforward;
/*
* Process any directory removals that have been committed.
*/
@@ -11648,6 +12033,7 @@
if ((pagedep->pd_state & NEWBLOCK) == 0)
while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
free_diradd(dap, NULL);
+rollforward:
/*
* Uncommitted directory entries must be restored.
*/
@@ -11680,7 +12066,7 @@
* marked dirty so that its will eventually get written back in
* its correct form.
*/
- if (chgs) {
+ if (chgs || (flags & WRITESUCCEEDED) == 0) {
if ((bp->b_flags & B_DELWRI) == 0)
stat_dir_entry++;
bdirty(bp);
@@ -11719,18 +12105,20 @@
{
struct inodedep *inodedep;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ip->i_ump)) != 0,
+ ("softdep_load_inodeblock called on non-softdep filesystem"));
/*
* Check for alternate nlink count.
*/
ip->i_effnlink = ip->i_nlink;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ip->i_ump);
if (inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0,
&inodedep) == 0) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ip->i_ump);
return;
}
ip->i_effnlink -= inodedep->id_nlinkdelta;
- FREE_LOCK(&lk);
+ FREE_LOCK(ip->i_ump);
}
/*
@@ -11751,6 +12139,7 @@
{
struct inodedep *inodedep;
struct inoref *inoref;
+ struct ufsmount *ump;
struct worklist *wk;
struct mount *mp;
struct buf *ibp;
@@ -11757,7 +12146,10 @@
struct fs *fs;
int error;
- mp = UFSTOVFS(ip->i_ump);
+ ump = ip->i_ump;
+ mp = UFSTOVFS(ump);
+ KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+ ("softdep_update_inodeblock called on non-softdep filesystem"));
fs = ip->i_fs;
/*
* Preserve the freelink that is on disk. clear_unlinked_inodedep()
@@ -11777,10 +12169,10 @@
* if there is no existing inodedep, then there are no dependencies
* to track.
*/
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
again:
if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (ip->i_effnlink != ip->i_nlink)
panic("softdep_update_inodeblock: bad link count");
return;
@@ -11839,16 +12231,16 @@
* to be written so that the update can be done.
*/
if (waitfor == 0) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return;
}
retry:
if ((inodedep->id_state & (DEPCOMPLETE | GOINGAWAY)) != 0) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return;
}
ibp = inodedep->id_bmsafemap->sm_buf;
- ibp = getdirtybuf(ibp, &lk, MNT_WAIT);
+ ibp = getdirtybuf(ibp, LOCK_PTR(ump), MNT_WAIT);
if (ibp == NULL) {
/*
* If ibp came back as NULL, the dependency could have been
@@ -11857,10 +12249,10 @@
*/
if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0)
goto retry;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return;
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if ((error = bwrite(ibp)) != 0)
softdep_error("softdep_update_inodeblock: bwrite", error);
}
@@ -11909,6 +12301,7 @@
struct inodedep *inodedep;
struct pagedep *pagedep;
struct inoref *inoref;
+ struct ufsmount *ump;
struct worklist *wk;
struct diradd *dap;
struct mount *mp;
@@ -11923,11 +12316,14 @@
ip = VTOI(vp);
fs = ip->i_fs;
+ ump = ip->i_ump;
mp = vp->v_mount;
- ACQUIRE_LOCK(&lk);
+ if (MOUNTEDSOFTDEP(mp) == 0)
+ return (0);
+ ACQUIRE_LOCK(ump);
restart:
if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return (0);
}
TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
@@ -11985,7 +12381,7 @@
* the lock on our parent. See the comment in ufs_lookup
* for details on possible races.
*/
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (ffs_vgetf(mp, parentino, LK_NOWAIT | LK_EXCLUSIVE, &pvp,
FFSV_FORCEINSMQ)) {
error = vfs_busy(mp, MBF_NOWAIT);
@@ -12030,7 +12426,7 @@
vput(pvp);
return (error);
}
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
locked = 1;
if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0) {
if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) != NULL) {
@@ -12043,7 +12439,7 @@
else
pagedep = dap->da_pagedep;
pagedep_new_block = pagedep->pd_state & NEWBLOCK;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
locked = 0;
if (pagedep_new_block && (error =
ffs_syncvnode(pvp, MNT_WAIT, 0))) {
@@ -12053,7 +12449,7 @@
}
}
if (locked)
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
/*
* Flush directory page containing the inode's name.
@@ -12067,11 +12463,11 @@
vput(pvp);
if (error != 0)
return (error);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0)
break;
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return (0);
}
@@ -12095,7 +12491,6 @@
bo = &vp->v_bufobj;
restart:
BO_LOCK(bo);
- ACQUIRE_LOCK(&lk);
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
/*
* If it is already scheduled, skip to the next buffer.
@@ -12115,13 +12510,11 @@
BUF_UNLOCK(bp);
continue;
}
- FREE_LOCK(&lk);
BO_UNLOCK(bo);
bremfree(bp);
(void) bawrite(bp);
goto restart;
}
- FREE_LOCK(&lk);
drain_output(vp);
BO_UNLOCK(bo);
}
@@ -12128,7 +12521,7 @@
/*
* Sync all cylinder groups that were dirty at the time this function is
- * called. Newly dirtied cgs will be inserted before the sintenel. This
+ * called. Newly dirtied cgs will be inserted before the sentinel. This
* is used to flush freedep activity that may be holding up writes to a
* indirect block.
*/
@@ -12138,25 +12531,25 @@
int waitfor;
{
struct bmsafemap *bmsafemap;
- struct bmsafemap *sintenel;
+ struct bmsafemap *sentinel;
struct ufsmount *ump;
struct buf *bp;
int error;
- sintenel = malloc(sizeof(*sintenel), M_BMSAFEMAP, M_ZERO | M_WAITOK);
- sintenel->sm_cg = -1;
+ sentinel = malloc(sizeof(*sentinel), M_BMSAFEMAP, M_ZERO | M_WAITOK);
+ sentinel->sm_cg = -1;
ump = VFSTOUFS(mp);
error = 0;
- ACQUIRE_LOCK(&lk);
- LIST_INSERT_HEAD(&ump->softdep_dirtycg, sintenel, sm_next);
- for (bmsafemap = LIST_NEXT(sintenel, sm_next); bmsafemap != NULL;
- bmsafemap = LIST_NEXT(sintenel, sm_next)) {
- /* Skip sintenels and cgs with no work to release. */
+ ACQUIRE_LOCK(ump);
+ LIST_INSERT_HEAD(&ump->softdep_dirtycg, sentinel, sm_next);
+ for (bmsafemap = LIST_NEXT(sentinel, sm_next); bmsafemap != NULL;
+ bmsafemap = LIST_NEXT(sentinel, sm_next)) {
+ /* Skip sentinels and cgs with no work to release. */
if (bmsafemap->sm_cg == -1 ||
(LIST_EMPTY(&bmsafemap->sm_freehd) &&
LIST_EMPTY(&bmsafemap->sm_freewr))) {
- LIST_REMOVE(sintenel, sm_next);
- LIST_INSERT_AFTER(bmsafemap, sintenel, sm_next);
+ LIST_REMOVE(sentinel, sm_next);
+ LIST_INSERT_AFTER(bmsafemap, sentinel, sm_next);
continue;
}
/*
@@ -12163,25 +12556,25 @@
* If we don't get the lock and we're waiting try again, if
* not move on to the next buf and try to sync it.
*/
- bp = getdirtybuf(bmsafemap->sm_buf, &lk, waitfor);
+ bp = getdirtybuf(bmsafemap->sm_buf, LOCK_PTR(ump), waitfor);
if (bp == NULL && waitfor == MNT_WAIT)
continue;
- LIST_REMOVE(sintenel, sm_next);
- LIST_INSERT_AFTER(bmsafemap, sintenel, sm_next);
+ LIST_REMOVE(sentinel, sm_next);
+ LIST_INSERT_AFTER(bmsafemap, sentinel, sm_next);
if (bp == NULL)
continue;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (waitfor == MNT_NOWAIT)
bawrite(bp);
else
error = bwrite(bp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
if (error)
break;
}
- LIST_REMOVE(sintenel, sm_next);
- FREE_LOCK(&lk);
- free(sintenel, M_BMSAFEMAP);
+ LIST_REMOVE(sentinel, sm_next);
+ FREE_LOCK(ump);
+ free(sentinel, M_BMSAFEMAP);
return (error);
}
@@ -12193,25 +12586,29 @@
int
softdep_sync_metadata(struct vnode *vp)
{
+ struct inode *ip;
int error;
+ ip = VTOI(vp);
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ip->i_ump)) != 0,
+ ("softdep_sync_metadata called on non-softdep filesystem"));
/*
* Ensure that any direct block dependencies have been cleared,
* truncations are started, and inode references are journaled.
*/
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ip->i_ump);
/*
* Write all journal records to prevent rollbacks on devvp.
*/
if (vp->v_type == VCHR)
softdep_flushjournal(vp->v_mount);
- error = flush_inodedep_deps(vp, vp->v_mount, VTOI(vp)->i_number);
+ error = flush_inodedep_deps(vp, vp->v_mount, ip->i_number);
/*
* Ensure that all truncates are written so we won't find deps on
* indirect blocks.
*/
process_truncates(vp);
- FREE_LOCK(&lk);
+ FREE_LOCK(ip->i_ump);
return (error);
}
@@ -12230,10 +12627,13 @@
struct pagedep *pagedep;
struct allocindir *aip;
struct newblk *newblk;
+ struct ufsmount *ump;
struct buf *nbp;
struct worklist *wk;
int i, error;
+ KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
+ ("softdep_sync_buf called on non-softdep filesystem"));
/*
* For VCHR we just don't want to force flush any dependencies that
* will cause rollbacks.
@@ -12243,7 +12643,8 @@
return (EBUSY);
return (0);
}
- ACQUIRE_LOCK(&lk);
+ ump = VTOI(vp)->i_ump;
+ ACQUIRE_LOCK(ump);
/*
* As we hold the buffer locked, none of its dependencies
* will disappear.
@@ -12268,13 +12669,13 @@
waitfor == MNT_NOWAIT)
continue;
nbp = newblk->nb_bmsafemap->sm_buf;
- nbp = getdirtybuf(nbp, &lk, waitfor);
+ nbp = getdirtybuf(nbp, LOCK_PTR(ump), waitfor);
if (nbp == NULL)
goto top;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if ((error = bwrite(nbp)) != 0)
goto out;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
continue;
case D_INDIRDEP:
@@ -12299,13 +12700,13 @@
if (newblk->nb_state & DEPCOMPLETE)
continue;
nbp = newblk->nb_bmsafemap->sm_buf;
- nbp = getdirtybuf(nbp, &lk, waitfor);
+ nbp = getdirtybuf(nbp, LOCK_PTR(ump), waitfor);
if (nbp == NULL)
goto restart;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if ((error = bwrite(nbp)) != 0)
goto out;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
goto restart;
}
continue;
@@ -12356,7 +12757,7 @@
}
}
out_unlock:
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
out:
return (error);
}
@@ -12373,6 +12774,7 @@
{
struct inodedep *inodedep;
struct inoref *inoref;
+ struct ufsmount *ump;
int error, waitfor;
/*
@@ -12388,11 +12790,13 @@
* We give a brief window at the top of the loop to allow
* any pending I/O to complete.
*/
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
for (error = 0, waitfor = MNT_NOWAIT; ; ) {
if (error)
return (error);
- FREE_LOCK(&lk);
- ACQUIRE_LOCK(&lk);
+ FREE_LOCK(ump);
+ ACQUIRE_LOCK(ump);
restart:
if (inodedep_lookup(mp, ino, 0, &inodedep) == 0)
return (0);
@@ -12435,9 +12839,13 @@
{
struct allocdirect *adp;
struct newblk *newblk;
+ struct ufsmount *ump;
struct buf *bp;
- mtx_assert(&lk, MA_OWNED);
+ if ((adp = TAILQ_FIRST(listhead)) == NULL)
+ return (0);
+ ump = VFSTOUFS(adp->ad_list.wk_mp);
+ LOCK_OWNED(ump);
TAILQ_FOREACH(adp, listhead, ad_next) {
newblk = (struct newblk *)adp;
if (newblk->nb_jnewblk != NULL) {
@@ -12447,18 +12855,18 @@
if (newblk->nb_state & DEPCOMPLETE)
continue;
bp = newblk->nb_bmsafemap->sm_buf;
- bp = getdirtybuf(bp, &lk, waitfor);
+ bp = getdirtybuf(bp, LOCK_PTR(ump), waitfor);
if (bp == NULL) {
if (waitfor == MNT_NOWAIT)
continue;
return (1);
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (waitfor == MNT_NOWAIT)
bawrite(bp);
else
*errorp = bwrite(bp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
return (1);
}
return (0);
@@ -12474,6 +12882,7 @@
ufs_lbn_t lbn;
{
struct newblk *newblk;
+ struct ufsmount *ump;
struct bufobj *bo;
struct inode *ip;
struct buf *bp;
@@ -12486,7 +12895,8 @@
blkno = DIP(ip, i_db[lbn]);
if (blkno == 0)
panic("flush_newblk_dep: Missing block");
- ACQUIRE_LOCK(&lk);
+ ump = VFSTOUFS(mp);
+ ACQUIRE_LOCK(ump);
/*
* Loop until all dependencies related to this block are satisfied.
* We must be careful to restart after each sleep in case a write
@@ -12494,7 +12904,7 @@
*/
for (;;) {
if (newblk_lookup(mp, blkno, 0, &newblk) == 0) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
break;
}
if (newblk->nb_list.wk_type != D_ALLOCDIRECT)
@@ -12511,27 +12921,28 @@
*/
if ((newblk->nb_state & DEPCOMPLETE) == 0) {
bp = newblk->nb_bmsafemap->sm_buf;
- bp = getdirtybuf(bp, &lk, MNT_WAIT);
+ bp = getdirtybuf(bp, LOCK_PTR(ump), MNT_WAIT);
if (bp == NULL)
continue;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
error = bwrite(bp);
if (error)
break;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
continue;
}
/*
* Write the buffer.
*/
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
BO_LOCK(bo);
bp = gbincore(bo, lbn);
if (bp != NULL) {
error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
- LK_INTERLOCK, BO_MTX(bo));
+ LK_INTERLOCK, BO_LOCKPTR(bo));
if (error == ENOLCK) {
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
+ error = 0;
continue; /* Slept, retry */
}
if (error != 0)
@@ -12553,7 +12964,7 @@
error = ffs_update(vp, 1);
if (error)
break;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
return (error);
}
@@ -12576,8 +12987,11 @@
int error = 0;
struct buf *bp;
ino_t inum;
+ struct diraddhd unfinished;
+ LIST_INIT(&unfinished);
ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
restart:
while ((dap = LIST_FIRST(diraddhdp)) != NULL) {
/*
@@ -12585,17 +12999,29 @@
* has a MKDIR_PARENT dependency.
*/
if (dap->da_state & MKDIR_PARENT) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if ((error = ffs_update(pvp, 1)) != 0)
break;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
/*
* If that cleared dependencies, go on to next.
*/
if (dap != LIST_FIRST(diraddhdp))
continue;
- if (dap->da_state & MKDIR_PARENT)
- panic("flush_pagedep_deps: MKDIR_PARENT");
+ /*
+ * All MKDIR_PARENT dependencies and all the
+ * NEWBLOCK pagedeps that are contained in direct
+ * blocks were resolved by doing above ffs_update.
+ * Pagedeps contained in indirect blocks may
+ * require a complete sync'ing of the directory.
+ * We are in the midst of doing a complete sync,
+ * so if they are not resolved in this pass we
+ * defer them for now as they will be sync'ed by
+ * our caller shortly.
+ */
+ LIST_REMOVE(dap, da_pdlist);
+ LIST_INSERT_HEAD(&unfinished, dap, da_pdlist);
+ continue;
}
/*
* A newly allocated directory must have its "." and
@@ -12617,7 +13043,7 @@
}
}
if (dap->da_state & MKDIR_BODY) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
FFSV_FORCEINSMQ)))
break;
@@ -12632,7 +13058,7 @@
vput(vp);
if (error != 0)
break;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
/*
* If that cleared dependencies, go on to next.
*/
@@ -12665,13 +13091,13 @@
*/
if ((inodedep->id_state & (DEPCOMPLETE | GOINGAWAY)) == 0) {
bp = inodedep->id_bmsafemap->sm_buf;
- bp = getdirtybuf(bp, &lk, MNT_WAIT);
+ bp = getdirtybuf(bp, LOCK_PTR(ump), MNT_WAIT);
if (bp == NULL)
goto retry;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if ((error = bwrite(bp)) != 0)
break;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
if (dap != LIST_FIRST(diraddhdp))
continue;
}
@@ -12681,7 +13107,7 @@
* adjusted update it here to flush it to disk.
*/
if (dap == LIST_FIRST(diraddhdp)) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
FFSV_FORCEINSMQ)))
break;
@@ -12689,7 +13115,7 @@
vput(vp);
if (error)
break;
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
/*
* If we have failed to get rid of all the dependencies
@@ -12698,11 +13124,16 @@
if (dap == LIST_FIRST(diraddhdp)) {
inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep);
panic("flush_pagedep_deps: failed to flush "
- "inodedep %p ino %d dap %p", inodedep, inum, dap);
+ "inodedep %p ino %ju dap %p",
+ inodedep, (uintmax_t)inum, dap);
}
}
if (error)
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
+ while ((dap = LIST_FIRST(&unfinished)) != NULL) {
+ LIST_REMOVE(dap, da_pdlist);
+ LIST_INSERT_HEAD(diraddhdp, dap, da_pdlist);
+ }
return (error);
}
@@ -12721,28 +13152,54 @@
int jlow;
int max_softdeps_hard;
- ACQUIRE_LOCK(&lk);
+ KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
+ ("softdep_slowdown called on non-softdep filesystem"));
+ ump = VFSTOUFS(vp->v_mount);
+ ACQUIRE_LOCK(ump);
jlow = 0;
/*
* Check for journal space if needed.
*/
if (DOINGSUJ(vp)) {
- ump = VFSTOUFS(vp->v_mount);
if (journal_space(ump, 0) == 0)
jlow = 1;
}
+ /*
+ * If the system is under its limits and our filesystem is
+ * not responsible for more than our share of the usage and
+ * we are not low on journal space, then no need to slow down.
+ */
max_softdeps_hard = max_softdeps * 11 / 10;
if (dep_current[D_DIRREM] < max_softdeps_hard / 2 &&
dep_current[D_INODEDEP] < max_softdeps_hard &&
- VFSTOUFS(vp->v_mount)->um_numindirdeps < maxindirdeps &&
- dep_current[D_FREEBLKS] < max_softdeps_hard && jlow == 0) {
- FREE_LOCK(&lk);
+ dep_current[D_INDIRDEP] < max_softdeps_hard / 1000 &&
+ dep_current[D_FREEBLKS] < max_softdeps_hard && jlow == 0 &&
+ ump->softdep_curdeps[D_DIRREM] <
+ (max_softdeps_hard / 2) / stat_flush_threads &&
+ ump->softdep_curdeps[D_INODEDEP] <
+ max_softdeps_hard / stat_flush_threads &&
+ ump->softdep_curdeps[D_INDIRDEP] <
+ (max_softdeps_hard / 1000) / stat_flush_threads &&
+ ump->softdep_curdeps[D_FREEBLKS] <
+ max_softdeps_hard / stat_flush_threads) {
+ FREE_LOCK(ump);
return (0);
}
- if (VFSTOUFS(vp->v_mount)->um_numindirdeps >= maxindirdeps || jlow)
- softdep_speedup();
+ /*
+ * If the journal is low or our filesystem is over its limit
+ * then speedup the cleanup.
+ */
+ if (ump->softdep_curdeps[D_INDIRDEP] <
+ (max_softdeps_hard / 1000) / stat_flush_threads || jlow)
+ softdep_speedup(ump);
stat_sync_limit_hit += 1;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
+ /*
+ * We only slow down the rate at which new dependencies are
+ * generated if we are not using journaling. With journaling,
+ * the cleanup should always be sufficient to keep things
+ * under control.
+ */
if (DOINGSUJ(vp))
return (0);
return (1);
@@ -12795,19 +13252,18 @@
mtx_assert(UFS_MTX(ump), MA_OWNED);
UFS_UNLOCK(ump);
error = ffs_update(vp, 1);
- if (error != 0) {
+ if (error != 0 || MOUNTEDSOFTDEP(mp) == 0) {
UFS_LOCK(ump);
return (0);
}
/*
- * If we are in need of resources, consider pausing for
- * tickdelay to give ourselves some breathing room.
+ * If we are in need of resources, start by cleaning up
+ * any block removals associated with our inode.
*/
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
process_removes(vp);
process_truncates(vp);
- request_cleanup(UFSTOVFS(ump), resource);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
/*
* Now clean up at least as many resources as we will need.
*
@@ -12850,12 +13306,12 @@
fs->fs_cstotal.cs_nbfree <= needed) ||
(resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
fs->fs_cstotal.cs_nifree <= needed)) {
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
if (ump->softdep_on_worklist > 0 &&
process_worklist_item(UFSTOVFS(ump),
ump->softdep_on_worklist, LK_NOWAIT) != 0)
stat_worklist_push += 1;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
/*
* If we still need resources and there are no more worklist
@@ -12901,11 +13357,98 @@
return (1);
}
+static bool
+softdep_excess_items(struct ufsmount *ump, int item)
+{
+
+ KASSERT(item >= 0 && item < D_LAST, ("item %d", item));
+ return (dep_current[item] > max_softdeps &&
+ ump->softdep_curdeps[item] > max_softdeps /
+ stat_flush_threads);
+}
+
+static void
+schedule_cleanup(struct mount *mp)
+{
+ struct ufsmount *ump;
+ struct thread *td;
+
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
+ FREE_LOCK(ump);
+ td = curthread;
+ if ((td->td_pflags & TDP_KTHREAD) != 0 &&
+ (td->td_proc->p_flag2 & P2_AST_SU) == 0) {
+ /*
+ * No ast is delivered to kernel threads, so nobody
+ * would deref the mp. Some kernel threads
+ * explicitely check for AST, e.g. NFS daemon does
+ * this in the serving loop.
+ */
+ return;
+ }
+ if (td->td_su != NULL)
+ vfs_rel(td->td_su);
+ vfs_ref(mp);
+ td->td_su = mp;
+ thread_lock(td);
+ td->td_flags |= TDF_ASTPENDING;
+ thread_unlock(td);
+}
+
+static void
+softdep_ast_cleanup_proc(void)
+{
+ struct thread *td;
+ struct mount *mp;
+ struct ufsmount *ump;
+ int error;
+ bool req;
+
+ td = curthread;
+ while ((mp = td->td_su) != NULL) {
+ td->td_su = NULL;
+ error = vfs_busy(mp, MBF_NOWAIT);
+ vfs_rel(mp);
+ if (error != 0)
+ return;
+ if (ffs_own_mount(mp) && MOUNTEDSOFTDEP(mp)) {
+ ump = VFSTOUFS(mp);
+ for (;;) {
+ req = false;
+ ACQUIRE_LOCK(ump);
+ if (softdep_excess_items(ump, D_INODEDEP)) {
+ req = true;
+ request_cleanup(mp, FLUSH_INODES);
+ }
+ if (softdep_excess_items(ump, D_DIRREM)) {
+ req = true;
+ request_cleanup(mp, FLUSH_BLOCKS);
+ }
+ FREE_LOCK(ump);
+ if (softdep_excess_items(ump, D_NEWBLK) ||
+ softdep_excess_items(ump, D_ALLOCDIRECT) ||
+ softdep_excess_items(ump, D_ALLOCINDIR)) {
+ error = vn_start_write(NULL, &mp,
+ V_WAIT);
+ if (error == 0) {
+ req = true;
+ VFS_SYNC(mp, MNT_WAIT);
+ vn_finished_write(mp);
+ }
+ }
+ if ((td->td_pflags & TDP_KTHREAD) != 0 || !req)
+ break;
+ }
+ }
+ vfs_unbusy(mp);
+ }
+}
+
/*
* If memory utilization has gotten too high, deliberately slow things
* down and speed up the I/O processing.
*/
-extern struct thread *syncertd;
static int
request_cleanup(mp, resource)
struct mount *mp;
@@ -12914,13 +13457,13 @@
struct thread *td = curthread;
struct ufsmount *ump;
- mtx_assert(&lk, MA_OWNED);
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
/*
* We never hold up the filesystem syncer or buf daemon.
*/
if (td->td_pflags & (TDP_SOFTDEP|TDP_NORUNNINGBUF))
return (0);
- ump = VFSTOUFS(mp);
/*
* First check to see if the work list has gotten backlogged.
* If it has, co-opt this process to help clean up two entries.
@@ -12940,7 +13483,7 @@
* Next, we attempt to speed up the syncer process. If that
* is successful, then we allow the process to continue.
*/
- if (softdep_speedup() &&
+ if (softdep_speedup(ump) &&
resource != FLUSH_BLOCKS_WAIT &&
resource != FLUSH_INODES_WAIT)
return(0);
@@ -12958,15 +13501,19 @@
case FLUSH_INODES:
case FLUSH_INODES_WAIT:
+ ACQUIRE_GBLLOCK(&lk);
stat_ino_limit_push += 1;
req_clear_inodedeps += 1;
+ FREE_GBLLOCK(&lk);
stat_countp = &stat_ino_limit_hit;
break;
case FLUSH_BLOCKS:
case FLUSH_BLOCKS_WAIT:
+ ACQUIRE_GBLLOCK(&lk);
stat_blk_limit_push += 1;
req_clear_remove += 1;
+ FREE_GBLLOCK(&lk);
stat_countp = &stat_blk_limit_hit;
break;
@@ -12977,19 +13524,25 @@
* Hopefully the syncer daemon will catch up and awaken us.
* We wait at most tickdelay before proceeding in any case.
*/
+ ACQUIRE_GBLLOCK(&lk);
+ FREE_LOCK(ump);
proc_waiting += 1;
if (callout_pending(&softdep_callout) == FALSE)
callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2,
pause_timer, 0);
- msleep((caddr_t)&proc_waiting, &lk, PPAUSE, "softupdate", 0);
+ if ((td->td_pflags & TDP_KTHREAD) == 0)
+ msleep((caddr_t)&proc_waiting, &lk, PPAUSE, "softupdate", 0);
proc_waiting -= 1;
+ FREE_GBLLOCK(&lk);
+ ACQUIRE_LOCK(ump);
return (1);
}
/*
* Awaken processes pausing in request_cleanup and clear proc_waiting
- * to indicate that there is no longer a timer running.
+ * to indicate that there is no longer a timer running. Pause_timer
+ * will be called with the global softdep mutex (&lk) locked.
*/
static void
pause_timer(arg)
@@ -12996,48 +13549,80 @@
void *arg;
{
+ GBLLOCK_OWNED(&lk);
/*
* The callout_ API has acquired mtx and will hold it around this
* function call.
*/
- *stat_countp += 1;
- wakeup_one(&proc_waiting);
- if (proc_waiting > 0)
- callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2,
- pause_timer, 0);
+ *stat_countp += proc_waiting;
+ wakeup(&proc_waiting);
}
/*
+ * If requested, try removing inode or removal dependencies.
+ */
+static void
+check_clear_deps(mp)
+ struct mount *mp;
+{
+
+ /*
+ * If we are suspended, it may be because of our using
+ * too many inodedeps, so help clear them out.
+ */
+ if (MOUNTEDSUJ(mp) && VFSTOUFS(mp)->softdep_jblocks->jb_suspended)
+ clear_inodedeps(mp);
+ /*
+ * General requests for cleanup of backed up dependencies
+ */
+ ACQUIRE_GBLLOCK(&lk);
+ if (req_clear_inodedeps) {
+ req_clear_inodedeps -= 1;
+ FREE_GBLLOCK(&lk);
+ clear_inodedeps(mp);
+ ACQUIRE_GBLLOCK(&lk);
+ wakeup(&proc_waiting);
+ }
+ if (req_clear_remove) {
+ req_clear_remove -= 1;
+ FREE_GBLLOCK(&lk);
+ clear_remove(mp);
+ ACQUIRE_GBLLOCK(&lk);
+ wakeup(&proc_waiting);
+ }
+ FREE_GBLLOCK(&lk);
+}
+
+/*
* Flush out a directory with at least one removal dependency in an effort to
* reduce the number of dirrem, freefile, and freeblks dependency structures.
*/
static void
-clear_remove(td)
- struct thread *td;
+clear_remove(mp)
+ struct mount *mp;
{
struct pagedep_hashhead *pagedephd;
struct pagedep *pagedep;
- static int next = 0;
- struct mount *mp;
+ struct ufsmount *ump;
struct vnode *vp;
struct bufobj *bo;
int error, cnt;
ino_t ino;
- mtx_assert(&lk, MA_OWNED);
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
- for (cnt = 0; cnt <= pagedep_hash; cnt++) {
- pagedephd = &pagedep_hashtbl[next++];
- if (next > pagedep_hash)
- next = 0;
+ for (cnt = 0; cnt <= ump->pagedep_hash_size; cnt++) {
+ pagedephd = &ump->pagedep_hashtbl[ump->pagedep_nextclean++];
+ if (ump->pagedep_nextclean > ump->pagedep_hash_size)
+ ump->pagedep_nextclean = 0;
LIST_FOREACH(pagedep, pagedephd, pd_hash) {
if (LIST_EMPTY(&pagedep->pd_dirremhd))
continue;
- mp = pagedep->pd_list.wk_mp;
ino = pagedep->pd_ino;
if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
continue;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
/*
* Let unmount clear deps
@@ -13061,7 +13646,7 @@
vput(vp);
finish_write:
vn_finished_write(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
return;
}
}
@@ -13072,35 +13657,34 @@
* the number of inodedep dependency structures.
*/
static void
-clear_inodedeps(td)
- struct thread *td;
+clear_inodedeps(mp)
+ struct mount *mp;
{
struct inodedep_hashhead *inodedephd;
struct inodedep *inodedep;
- static int next = 0;
- struct mount *mp;
+ struct ufsmount *ump;
struct vnode *vp;
struct fs *fs;
int error, cnt;
ino_t firstino, lastino, ino;
- mtx_assert(&lk, MA_OWNED);
+ ump = VFSTOUFS(mp);
+ fs = ump->um_fs;
+ LOCK_OWNED(ump);
/*
* Pick a random inode dependency to be cleared.
* We will then gather up all the inodes in its block
* that have dependencies and flush them out.
*/
- for (cnt = 0; cnt <= inodedep_hash; cnt++) {
- inodedephd = &inodedep_hashtbl[next++];
- if (next > inodedep_hash)
- next = 0;
+ for (cnt = 0; cnt <= ump->inodedep_hash_size; cnt++) {
+ inodedephd = &ump->inodedep_hashtbl[ump->inodedep_nextclean++];
+ if (ump->inodedep_nextclean > ump->inodedep_hash_size)
+ ump->inodedep_nextclean = 0;
if ((inodedep = LIST_FIRST(inodedephd)) != NULL)
break;
}
if (inodedep == NULL)
return;
- fs = inodedep->id_fs;
- mp = inodedep->id_list.wk_mp;
/*
* Find the last inode in the block with dependencies.
*/
@@ -13118,11 +13702,11 @@
continue;
if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
continue;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
error = vfs_busy(mp, MBF_NOWAIT); /* Let unmount clear deps */
if (error != 0) {
vn_finished_write(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
return;
}
if ((error = ffs_vgetf(mp, ino, LK_EXCLUSIVE, &vp,
@@ -13130,7 +13714,7 @@
softdep_error("clear_inodedeps: vget", error);
vfs_unbusy(mp);
vn_finished_write(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
return;
}
vfs_unbusy(mp);
@@ -13146,7 +13730,7 @@
}
vput(vp);
vn_finished_write(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
}
@@ -13156,13 +13740,19 @@
struct workhead *wkhd;
{
struct worklist *wk;
+ struct ufsmount *ump;
- ACQUIRE_LOCK(&lk);
+ if ((wk = LIST_FIRST(wkhd)) == NULL)
+ return;
+ KASSERT(MOUNTEDSOFTDEP(wk->wk_mp) != 0,
+ ("softdep_buf_append called on non-softdep filesystem"));
+ ump = VFSTOUFS(wk->wk_mp);
+ ACQUIRE_LOCK(ump);
while ((wk = LIST_FIRST(wkhd)) != NULL) {
WORKLIST_REMOVE(wk);
WORKLIST_INSERT(&bp->b_dep, wk);
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
@@ -13176,10 +13766,13 @@
struct fs *fs;
int error;
+ KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ip->i_ump)) != 0,
+ ("softdep_inode_append called on non-softdep filesystem"));
fs = ip->i_fs;
error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
(int)fs->fs_bsize, cred, &bp);
if (error) {
+ bqrelse(bp);
softdep_freework(wkhd);
return;
}
@@ -13191,10 +13784,17 @@
softdep_freework(wkhd)
struct workhead *wkhd;
{
+ struct worklist *wk;
+ struct ufsmount *ump;
- ACQUIRE_LOCK(&lk);
+ if ((wk = LIST_FIRST(wkhd)) == NULL)
+ return;
+ KASSERT(MOUNTEDSOFTDEP(wk->wk_mp) != 0,
+ ("softdep_freework called on non-softdep filesystem"));
+ ump = VFSTOUFS(wk->wk_mp);
+ ACQUIRE_LOCK(ump);
handle_jwork(wkhd);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
/*
@@ -13208,6 +13808,7 @@
int wantcount;
{
struct worklist *wk;
+ struct ufsmount *ump;
struct bmsafemap *bmsafemap;
struct freework *freework;
struct inodedep *inodedep;
@@ -13222,7 +13823,10 @@
int i, retval;
retval = 0;
- ACQUIRE_LOCK(&lk);
+ if ((wk = LIST_FIRST(&bp->b_dep)) == NULL)
+ return (0);
+ ump = VFSTOUFS(wk->wk_mp);
+ ACQUIRE_LOCK(ump);
LIST_FOREACH(wk, &bp->b_dep, wk_list) {
switch (wk->wk_type) {
@@ -13355,7 +13959,7 @@
}
}
out:
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return retval;
}
@@ -13365,42 +13969,41 @@
* Return acquired buffer or NULL on failure.
*/
static struct buf *
-getdirtybuf(bp, mtx, waitfor)
+getdirtybuf(bp, lock, waitfor)
struct buf *bp;
- struct mtx *mtx;
+ struct rwlock *lock;
int waitfor;
{
int error;
- mtx_assert(mtx, MA_OWNED);
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0) {
if (waitfor != MNT_WAIT)
return (NULL);
error = BUF_LOCK(bp,
- LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, mtx);
+ LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, lock);
/*
* Even if we sucessfully acquire bp here, we have dropped
- * mtx, which may violates our guarantee.
+ * lock, which may violates our guarantee.
*/
if (error == 0)
BUF_UNLOCK(bp);
else if (error != ENOLCK)
panic("getdirtybuf: inconsistent lock: %d", error);
- mtx_lock(mtx);
+ rw_wlock(lock);
return (NULL);
}
if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
- if (mtx == &lk && waitfor == MNT_WAIT) {
- mtx_unlock(mtx);
+ if (lock != BO_LOCKPTR(bp->b_bufobj) && waitfor == MNT_WAIT) {
+ rw_wunlock(lock);
BO_LOCK(bp->b_bufobj);
BUF_UNLOCK(bp);
if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
bp->b_vflags |= BV_BKGRDWAIT;
- msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj),
+ msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj),
PRIBIO | PDROP, "getbuf", 0);
} else
BO_UNLOCK(bp->b_bufobj);
- mtx_lock(mtx);
+ rw_wlock(lock);
return (NULL);
}
BUF_UNLOCK(bp);
@@ -13407,15 +14010,15 @@
if (waitfor != MNT_WAIT)
return (NULL);
/*
- * The mtx argument must be bp->b_vp's mutex in
+ * The lock argument must be bp->b_vp's mutex in
* this case.
*/
#ifdef DEBUG_VFS_LOCKS
if (bp->b_vp->v_type != VCHR)
- ASSERT_BO_LOCKED(bp->b_bufobj);
+ ASSERT_BO_WLOCKED(bp->b_bufobj);
#endif
bp->b_vflags |= BV_BKGRDWAIT;
- msleep(&bp->b_xflags, mtx, PRIBIO, "getbuf", 0);
+ rw_sleep(&bp->b_xflags, lock, PRIBIO, "getbuf", 0);
return (NULL);
}
if ((bp->b_flags & B_DELWRI) == 0) {
@@ -13436,30 +14039,65 @@
int
softdep_check_suspend(struct mount *mp,
struct vnode *devvp,
- int softdep_deps,
- int softdep_accdeps,
+ int softdep_depcnt,
+ int softdep_accdepcnt,
int secondary_writes,
int secondary_accwrites)
{
struct bufobj *bo;
struct ufsmount *ump;
- int error;
+ struct inodedep *inodedep;
+ int error, unlinked;
- ump = VFSTOUFS(mp);
bo = &devvp->v_bufobj;
- ASSERT_BO_LOCKED(bo);
+ ASSERT_BO_WLOCKED(bo);
+ /*
+ * If we are not running with soft updates, then we need only
+ * deal with secondary writes as we try to suspend.
+ */
+ if (MOUNTEDSOFTDEP(mp) == 0) {
+ MNT_ILOCK(mp);
+ while (mp->mnt_secondary_writes != 0) {
+ BO_UNLOCK(bo);
+ msleep(&mp->mnt_secondary_writes, MNT_MTX(mp),
+ (PUSER - 1) | PDROP, "secwr", 0);
+ BO_LOCK(bo);
+ MNT_ILOCK(mp);
+ }
+
+ /*
+ * Reasons for needing more work before suspend:
+ * - Dirty buffers on devvp.
+ * - Secondary writes occurred after start of vnode sync loop
+ */
+ error = 0;
+ if (bo->bo_numoutput > 0 ||
+ bo->bo_dirty.bv_cnt > 0 ||
+ secondary_writes != 0 ||
+ mp->mnt_secondary_writes != 0 ||
+ secondary_accwrites != mp->mnt_secondary_accwrites)
+ error = EAGAIN;
+ BO_UNLOCK(bo);
+ return (error);
+ }
+
+ /*
+ * If we are running with soft updates, then we need to coordinate
+ * with them as we try to suspend.
+ */
+ ump = VFSTOUFS(mp);
for (;;) {
- if (!TRY_ACQUIRE_LOCK(&lk)) {
+ if (!TRY_ACQUIRE_LOCK(ump)) {
BO_UNLOCK(bo);
- ACQUIRE_LOCK(&lk);
- FREE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
+ FREE_LOCK(ump);
BO_LOCK(bo);
continue;
}
MNT_ILOCK(mp);
if (mp->mnt_secondary_writes != 0) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
BO_UNLOCK(bo);
msleep(&mp->mnt_secondary_writes,
MNT_MTX(mp),
@@ -13470,6 +14108,20 @@
break;
}
+ unlinked = 0;
+ if (MOUNTEDSUJ(mp)) {
+ for (inodedep = TAILQ_FIRST(&ump->softdep_unlinked);
+ inodedep != NULL;
+ inodedep = TAILQ_NEXT(inodedep, id_unlinked)) {
+ if ((inodedep->id_state & (UNLINKED | UNLINKLINKS |
+ UNLINKONLIST)) != (UNLINKED | UNLINKLINKS |
+ UNLINKONLIST) ||
+ !check_inodedep_free(inodedep))
+ continue;
+ unlinked++;
+ }
+ }
+
/*
* Reasons for needing more work before suspend:
* - Dirty buffers on devvp.
@@ -13479,14 +14131,14 @@
error = 0;
if (bo->bo_numoutput > 0 ||
bo->bo_dirty.bv_cnt > 0 ||
- softdep_deps != 0 ||
- ump->softdep_deps != 0 ||
- softdep_accdeps != ump->softdep_accdeps ||
+ softdep_depcnt != unlinked ||
+ ump->softdep_deps != unlinked ||
+ softdep_accdepcnt != ump->softdep_accdeps ||
secondary_writes != 0 ||
mp->mnt_secondary_writes != 0 ||
secondary_accwrites != mp->mnt_secondary_accwrites)
error = EAGAIN;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
BO_UNLOCK(bo);
return (error);
}
@@ -13504,11 +14156,16 @@
{
struct ufsmount *ump;
+ if (MOUNTEDSOFTDEP(mp) == 0) {
+ *softdep_depsp = 0;
+ *softdep_accdepsp = 0;
+ return;
+ }
ump = VFSTOUFS(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
*softdep_depsp = ump->softdep_deps;
*softdep_accdepsp = ump->softdep_accdeps;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
/*
@@ -13525,12 +14182,12 @@
bo = &vp->v_bufobj;
ASSERT_VOP_LOCKED(vp, "drain_output");
- ASSERT_BO_LOCKED(bo);
+ ASSERT_BO_WLOCKED(bo);
while (bo->bo_numoutput) {
bo->bo_flag |= BO_WWAIT;
msleep((caddr_t)&bo->bo_numoutput,
- BO_MTX(bo), PRIBIO + 1, "drainvp", 0);
+ BO_LOCKPTR(bo), PRIBIO + 1, "drainvp", 0);
}
}
@@ -13546,8 +14203,13 @@
if ((bp->b_ioflags & BIO_ERROR) == 0)
panic("softdep_deallocate_dependencies: dangling deps");
- softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);
- panic("softdep_deallocate_dependencies: unrecovered I/O error");
+ if (bp->b_vp != NULL && bp->b_vp->v_mount != NULL)
+ softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);
+ else
+ printf("softdep_deallocate_dependencies: "
+ "got error %d while accessing filesystem\n", bp->b_error);
+ if (bp->b_error != ENXIO)
+ panic("softdep_deallocate_dependencies: unrecovered I/O error");
}
/*
@@ -13608,15 +14270,17 @@
{
struct inodedep_hashhead *inodedephd;
struct inodedep *inodedep;
- struct fs *fs;
+ struct ufsmount *ump;
int cnt;
- fs = have_addr ? (struct fs *)addr : NULL;
- for (cnt = 0; cnt < inodedep_hash; cnt++) {
- inodedephd = &inodedep_hashtbl[cnt];
+ if (have_addr == 0) {
+ db_printf("Address required\n");
+ return;
+ }
+ ump = (struct ufsmount *)addr;
+ for (cnt = 0; cnt < ump->inodedep_hash_size; cnt++) {
+ inodedephd = &ump->inodedep_hashtbl[cnt];
LIST_FOREACH(inodedep, inodedephd, id_hash) {
- if (fs != NULL && fs != inodedep->id_fs)
- continue;
inodedep_print(inodedep, 0);
}
}
@@ -13658,11 +14322,17 @@
DB_SHOW_COMMAND(mkdirs, db_show_mkdirs)
{
+ struct mkdirlist *mkdirlisthd;
struct jaddref *jaddref;
struct diradd *diradd;
struct mkdir *mkdir;
- LIST_FOREACH(mkdir, &mkdirlisthd, md_mkdirs) {
+ if (have_addr == 0) {
+ db_printf("Address required\n");
+ return;
+ }
+ mkdirlisthd = (struct mkdirlist *)addr;
+ LIST_FOREACH(mkdir, mkdirlisthd, md_mkdirs) {
diradd = mkdir->md_diradd;
db_printf("mkdir: %p state 0x%X dap %p state 0x%X",
mkdir, mkdir->md_state, diradd, diradd->da_state);
@@ -13673,6 +14343,17 @@
}
}
+/* exported to ffs_vfsops.c */
+extern void db_print_ffs(struct ufsmount *ump);
+void
+db_print_ffs(struct ufsmount *ump)
+{
+ db_printf("mp %p %s devvp %p fs %p su_wl %d su_deps %d su_req %d\n",
+ ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
+ ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
+ ump->softdep_deps, ump->softdep_req);
+}
+
#endif /* DDB */
#endif /* SOFTUPDATES */
Modified: trunk/sys/ufs/ffs/ffs_subr.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_subr.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_subr.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
@@ -30,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_subr.c 207141 2010-04-24 07:05:35Z jeff $");
#include <sys/param.h>
Modified: trunk/sys/ufs/ffs/ffs_suspend.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_suspend.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_suspend.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -27,11 +27,11 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/ufs/ffs/ffs_suspend.c 306175 2016-09-22 10:42:40Z kib $
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_suspend.c 306175 2016-09-22 10:42:40Z kib $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -177,7 +177,6 @@
static int
ffs_susp_suspend(struct mount *mp)
{
- struct fs *fs;
struct ufsmount *ump;
int error;
@@ -189,7 +188,6 @@
return (EBUSY);
ump = VFSTOUFS(mp);
- fs = ump->um_fs;
/*
* Make sure the calling thread is permitted to access the mounted
@@ -207,7 +205,7 @@
return (EPERM);
#endif
- if ((error = vfs_write_suspend(mp)) != 0)
+ if ((error = vfs_write_suspend(mp, VS_SKIP_UNMOUNT)) != 0)
return (error);
ump->um_writesuspended = 1;
@@ -237,7 +235,7 @@
KASSERT((mp->mnt_kern_flag & MNTK_SUSPEND) != 0,
("MNTK_SUSPEND not set"));
- error = ffs_reload(mp, curthread, 1);
+ error = ffs_reload(mp, curthread, FFSR_FORCE | FFSR_UNSUSPEND);
if (error != 0)
panic("failed to unsuspend writes on %s", fs->fs_fsmnt);
@@ -253,7 +251,7 @@
*/
mp->mnt_susp_owner = curthread;
- vfs_write_resume(mp);
+ vfs_write_resume(mp, 0);
vfs_unbusy(mp);
ump->um_writesuspended = 0;
Modified: trunk/sys/ufs/ffs/ffs_tables.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_tables.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_tables.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
@@ -30,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_tables.c 139825 2005-01-07 02:29:27Z imp $");
#include <sys/param.h>
#include <ufs/ufs/dinode.h>
Modified: trunk/sys/ufs/ffs/ffs_vfsops.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_vfsops.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_vfsops.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1989, 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
@@ -30,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_vfsops.c 309208 2016-11-27 09:14:52Z kib $");
#include "opt_quota.h"
#include "opt_ufs.h"
@@ -42,6 +43,7 @@
#include <sys/namei.h>
#include <sys/priv.h>
#include <sys/proc.h>
+#include <sys/taskqueue.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/mount.h>
@@ -52,6 +54,7 @@
#include <sys/ioccom.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
+#include <sys/rwlock.h>
#include <security/mac/mac_framework.h>
@@ -142,10 +145,10 @@
{
struct vnode *devvp;
struct thread *td;
- struct ufsmount *ump = 0;
+ struct ufsmount *ump = NULL;
struct fs *fs;
pid_t fsckpid = 0;
- int error, flags;
+ int error, error1, flags;
uint64_t mntorflags;
accmode_t accmode;
struct nameidata ndp;
@@ -254,31 +257,9 @@
*/
if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
return (error);
- for (;;) {
- vn_finished_write(mp);
- if ((error = vfs_write_suspend(mp)) != 0)
- return (error);
- MNT_ILOCK(mp);
- if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
- /*
- * Allow the secondary writes
- * to proceed.
- */
- mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
- MNTK_SUSPEND2);
- wakeup(&mp->mnt_flag);
- MNT_IUNLOCK(mp);
- /*
- * Allow the curthread to
- * ignore the suspension to
- * synchronize on-disk state.
- */
- td->td_pflags |= TDP_IGNSUSP;
- break;
- }
- MNT_IUNLOCK(mp);
- vn_start_write(NULL, &mp, V_WAIT);
- }
+ error = vfs_write_suspend_umnt(mp);
+ if (error != 0)
+ return (error);
/*
* Check for and optionally get rid of files open
* for writing.
@@ -292,7 +273,7 @@
error = ffs_flushfiles(mp, flags, td);
}
if (error) {
- vfs_write_resume(mp);
+ vfs_write_resume(mp, 0);
return (error);
}
if (fs->fs_pendingblocks != 0 ||
@@ -309,7 +290,7 @@
if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
fs->fs_ronly = 0;
fs->fs_clean = 0;
- vfs_write_resume(mp);
+ vfs_write_resume(mp, 0);
return (error);
}
if (MOUNTEDSOFTDEP(mp))
@@ -330,7 +311,7 @@
* Allow the writers to note that filesystem
* is ro now.
*/
- vfs_write_resume(mp);
+ vfs_write_resume(mp, 0);
}
if ((mp->mnt_flag & MNT_RELOAD) &&
(error = ffs_reload(mp, td, 0)) != 0)
@@ -481,6 +462,11 @@
*/
if (mp->mnt_flag & MNT_SNAPSHOT)
return (ffs_snapshot(mp, fspec));
+
+ /*
+ * Must not call namei() while owning busy ref.
+ */
+ vfs_unbusy(mp);
}
/*
@@ -488,7 +474,18 @@
* and verify that it refers to a sensible disk device.
*/
NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
- if ((error = namei(&ndp)) != 0)
+ error = namei(&ndp);
+ if ((mp->mnt_flag & MNT_UPDATE) != 0) {
+ /*
+ * Unmount does not start if MNT_UPDATE is set. Mount
+ * update busies mp before setting MNT_UPDATE. We
+ * must be able to retain our busy ref succesfully,
+ * without sleep.
+ */
+ error1 = vfs_busy(mp, MBF_NOWAIT);
+ MPASS(error1 == 0);
+ }
+ if (error != 0)
return (error);
NDFREE(&ndp, NDF_ONLY_PNBUF);
devvp = ndp.ni_vp;
@@ -532,7 +529,7 @@
* We need the name for the mount point (also used for
* "last mounted on") copied in. If an error occurs,
* the mount point is discarded by the upper level code.
- * Note that vfs_mount() populates f_mntonname for us.
+ * Note that vfs_mount_alloc() populates f_mntonname for us.
*/
if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
vrele(devvp);
@@ -603,11 +600,13 @@
* 2) re-read superblock from disk.
* 3) re-read summary information from disk.
* 4) invalidate all inactive vnodes.
- * 5) invalidate all cached file data.
- * 6) re-read inode data for all active vnodes.
+ * 5) clear MNTK_SUSPEND2 and MNTK_SUSPENDED flags, allowing secondary
+ * writers, if requested.
+ * 6) invalidate all cached file data.
+ * 7) re-read inode data for all active vnodes.
*/
int
-ffs_reload(struct mount *mp, struct thread *td, int force)
+ffs_reload(struct mount *mp, struct thread *td, int flags)
{
struct vnode *vp, *mvp, *devvp;
struct inode *ip;
@@ -616,13 +615,14 @@
struct fs *fs, *newfs;
struct ufsmount *ump;
ufs2_daddr_t sblockloc;
- int i, blks, size, error;
+ int i, blks, error;
+ u_long size;
int32_t *lp;
ump = VFSTOUFS(mp);
MNT_ILOCK(mp);
- if ((mp->mnt_flag & MNT_RDONLY) == 0 && force == 0) {
+ if ((mp->mnt_flag & MNT_RDONLY) == 0 && (flags & FFSR_FORCE) == 0) {
MNT_IUNLOCK(mp);
return (EINVAL);
}
@@ -686,7 +686,7 @@
size += fs->fs_ncg * sizeof(int32_t);
size += fs->fs_ncg * sizeof(u_int8_t);
free(fs->fs_csp, M_UFSMNT);
- space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
+ space = malloc(size, M_UFSMNT, M_WAITOK);
fs->fs_csp = space;
for (i = 0; i < blks; i += fs->fs_frag) {
size = fs->fs_bsize;
@@ -712,6 +712,12 @@
size = fs->fs_ncg * sizeof(u_int8_t);
fs->fs_contigdirs = (u_int8_t *)space;
bzero(fs->fs_contigdirs, size);
+ if ((flags & FFSR_UNSUSPEND) != 0) {
+ MNT_ILOCK(mp);
+ mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2);
+ wakeup(&mp->mnt_flag);
+ MNT_IUNLOCK(mp);
+ }
loop:
MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
@@ -773,7 +779,8 @@
struct cdev *dev;
void *space;
ufs2_daddr_t sblockloc;
- int error, i, blks, size, ronly;
+ int error, i, blks, len, ronly;
+ u_long size;
int32_t *lp;
struct ucred *cred;
struct g_consumer *cp;
@@ -784,23 +791,31 @@
cred = td ? td->td_ucred : NOCRED;
ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+ KASSERT(devvp->v_type == VCHR, ("reclaimed devvp"));
dev = devvp->v_rdev;
- dev_ref(dev);
+ if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0,
+ (uintptr_t)mp) == 0) {
+ VOP_UNLOCK(devvp, 0);
+ return (EBUSY);
+ }
DROP_GIANT();
g_topology_lock();
error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
g_topology_unlock();
PICKUP_GIANT();
+ if (error != 0) {
+ atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
+ VOP_UNLOCK(devvp, 0);
+ return (error);
+ }
+ dev_ref(dev);
+ devvp->v_bufobj.bo_ops = &ffs_ops;
VOP_UNLOCK(devvp, 0);
- if (error)
- goto out;
- if (devvp->v_rdev->si_iosize_max != 0)
- mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
+ if (dev->si_iosize_max != 0)
+ mp->mnt_iosize_max = dev->si_iosize_max;
if (mp->mnt_iosize_max > MAXPHYS)
mp->mnt_iosize_max = MAXPHYS;
- devvp->v_bufobj.bo_ops = &ffs_ops;
-
fs = NULL;
sblockloc = 0;
/*
@@ -872,11 +887,11 @@
/*
* Get journal provider name.
*/
- size = 1024;
- mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
- if (g_io_getattr("GJOURNAL::provider", cp, &size,
+ len = 1024;
+ mp->mnt_gjprovider = malloc((u_long)len, M_UFSMNT, M_WAITOK);
+ if (g_io_getattr("GJOURNAL::provider", cp, &len,
mp->mnt_gjprovider) == 0) {
- mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
+ mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, len,
M_UFSMNT, M_WAITOK);
MNT_ILOCK(mp);
mp->mnt_flag |= MNT_GJOURNAL;
@@ -928,7 +943,7 @@
if (fs->fs_contigsumsize > 0)
size += fs->fs_ncg * sizeof(int32_t);
size += fs->fs_ncg * sizeof(u_int8_t);
- space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
+ space = malloc(size, M_UFSMNT, M_WAITOK);
fs->fs_csp = space;
for (i = 0; i < blks; i += fs->fs_frag) {
size = fs->fs_bsize;
@@ -1013,8 +1028,8 @@
#endif
}
if ((fs->fs_flags & FS_TRIM) != 0) {
- size = sizeof(int);
- if (g_io_getattr("GEOM::candelete", cp, &size,
+ len = sizeof(int);
+ if (g_io_getattr("GEOM::candelete", cp, &len,
&ump->um_candelete) == 0) {
if (!ump->um_candelete)
printf("WARNING: %s: TRIM flag on fs but disk "
@@ -1026,6 +1041,12 @@
mp->mnt_stat.f_mntonname);
ump->um_candelete = 0;
}
+ if (ump->um_candelete) {
+ ump->um_trim_tq = taskqueue_create("trim", M_WAITOK,
+ taskqueue_thread_enqueue, &ump->um_trim_tq);
+ taskqueue_start_threads(&ump->um_trim_tq, 1, PVFS,
+ "%s trim", mp->mnt_stat.f_mntonname);
+ }
}
ump->um_mountp = mp;
@@ -1063,8 +1084,6 @@
ffs_flushfiles(mp, FORCECLOSE, td);
goto out;
}
- if (devvp->v_type == VCHR && devvp->v_rdev != NULL)
- devvp->v_rdev->si_mountpt = mp;
if (fs->fs_snapinum[0] != 0)
ffs_snapshot_mount(mp);
fs->fs_fmod = 1;
@@ -1072,11 +1091,11 @@
(void) ffs_sbupdate(ump, MNT_WAIT, 0);
}
/*
- * Initialize filesystem stat information in mount struct.
+ * Initialize filesystem state information in mount struct.
*/
MNT_ILOCK(mp);
- mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED |
- MNTK_EXTENDED_SHARED | MNTK_NO_IOPF;
+ mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
+ MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS | MNTK_USES_BCACHE;
MNT_IUNLOCK(mp);
#ifdef UFS_EXTATTR
#ifdef UFS_EXTATTR_AUTOSTART
@@ -1114,6 +1133,7 @@
free(ump, M_UFSMNT);
mp->mnt_data = NULL;
}
+ atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
dev_rel(dev);
return (error);
}
@@ -1234,7 +1254,7 @@
susp = 0;
if (mntflags & MNT_FORCE) {
flags |= FORCECLOSE;
- susp = fs->fs_ronly != 0;
+ susp = fs->fs_ronly == 0;
}
#ifdef UFS_EXTATTR
if ((error = ufs_extattr_stop(mp, td))) {
@@ -1249,25 +1269,9 @@
}
#endif
if (susp) {
- /*
- * dounmount already called vn_start_write().
- */
- for (;;) {
- vn_finished_write(mp);
- if ((error = vfs_write_suspend(mp)) != 0)
- return (error);
- MNT_ILOCK(mp);
- if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
- mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
- MNTK_SUSPEND2);
- wakeup(&mp->mnt_flag);
- MNT_IUNLOCK(mp);
- td->td_pflags |= TDP_IGNSUSP;
- break;
- }
- MNT_IUNLOCK(mp);
- vn_start_write(NULL, &mp, V_WAIT);
- }
+ error = vfs_write_suspend_umnt(mp);
+ if (error != 0)
+ goto fail1;
}
if (MOUNTEDSOFTDEP(mp))
error = softdep_flushfiles(mp, flags, td);
@@ -1285,7 +1289,8 @@
fs->fs_pendinginodes = 0;
}
UFS_UNLOCK(ump);
- softdep_unmount(mp);
+ if (MOUNTEDSOFTDEP(mp))
+ softdep_unmount(mp);
if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) {
fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
error = ffs_sbupdate(ump, MNT_WAIT, 0);
@@ -1294,9 +1299,13 @@
goto fail;
}
}
- if (susp) {
- vfs_write_resume(mp);
- vn_start_write(NULL, &mp, V_WAIT);
+ if (susp)
+ vfs_write_resume(mp, VR_START_WRITE);
+ if (ump->um_trim_tq != NULL) {
+ while (ump->um_trim_inflight != 0)
+ pause("ufsutr", hz);
+ taskqueue_drain_all(ump->um_trim_tq);
+ taskqueue_free(ump->um_trim_tq);
}
DROP_GIANT();
g_topology_lock();
@@ -1310,8 +1319,7 @@
g_vfs_close(ump->um_cp);
g_topology_unlock();
PICKUP_GIANT();
- if (ump->um_devvp->v_type == VCHR && ump->um_devvp->v_rdev != NULL)
- ump->um_devvp->v_rdev->si_mountpt = NULL;
+ atomic_store_rel_ptr((uintptr_t *)&ump->um_dev->si_mountpt, 0);
vrele(ump->um_devvp);
dev_rel(ump->um_dev);
mtx_destroy(UFS_MTX(ump));
@@ -1329,10 +1337,9 @@
return (error);
fail:
- if (susp) {
- vfs_write_resume(mp);
- vn_start_write(NULL, &mp, V_WAIT);
- }
+ if (susp)
+ vfs_write_resume(mp, VR_START_WRITE);
+fail1:
#ifdef UFS_EXTATTR
if (e_restart) {
ufs_extattr_uepm_init(&ump->um_extattr);
@@ -1448,6 +1455,14 @@
return (0);
}
+static bool
+sync_doupdate(struct inode *ip)
+{
+
+ return ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED |
+ IN_UPDATE)) != 0);
+}
+
/*
* For a lazy sync, we only care about access times, quotas and the
* superblock. Other filesystem changes are already converted to
@@ -1481,8 +1496,7 @@
* Test also all the other timestamp flags too, to pick up
* any other cases that could be missed.
*/
- if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED |
- IN_UPDATE)) == 0) {
+ if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) {
VI_UNLOCK(vp);
continue;
}
@@ -1489,7 +1503,8 @@
if ((error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
td)) != 0)
continue;
- error = ffs_update(vp, 0);
+ if (sync_doupdate(ip))
+ error = ffs_update(vp, 0);
if (error != 0)
allerror = error;
vput(vp);
@@ -1524,7 +1539,7 @@
struct inode *ip;
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs;
- int error, count, wait, lockreq, allerror = 0;
+ int error, count, lockreq, allerror = 0;
int suspend;
int suspended;
int secondary_writes;
@@ -1533,7 +1548,6 @@
int softdep_accdeps;
struct bufobj *bo;
- wait = 0;
suspend = 0;
suspended = 0;
td = curthread;
@@ -1541,8 +1555,11 @@
if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0)
panic("%s: ffs_sync: modification on read-only filesystem",
fs->fs_fsmnt);
- if (waitfor == MNT_LAZY)
- return (ffs_sync_lazy(mp));
+ if (waitfor == MNT_LAZY) {
+ if (!rebooting)
+ return (ffs_sync_lazy(mp));
+ waitfor = MNT_NOWAIT;
+ }
/*
* Write back each (modified) inode.
@@ -1552,10 +1569,8 @@
suspend = 1;
waitfor = MNT_WAIT;
}
- if (waitfor == MNT_WAIT) {
- wait = 1;
+ if (waitfor == MNT_WAIT)
lockreq = LK_EXCLUSIVE;
- }
lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
loop:
/* Grab snapshot of secondary write counts */
@@ -1599,7 +1614,7 @@
/*
* Force stale filesystem control information to be flushed.
*/
- if (waitfor == MNT_WAIT) {
+ if (waitfor == MNT_WAIT || rebooting) {
if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
allerror = error;
/* Flushed work items may create new vnodes to clean */
@@ -1616,9 +1631,12 @@
if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) {
BO_UNLOCK(bo);
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
- if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
+ error = VOP_FSYNC(devvp, waitfor, td);
+ VOP_UNLOCK(devvp, 0);
+ if (MOUNTEDSOFTDEP(mp) && (error == 0 || error == EAGAIN))
+ error = ffs_sbupdate(ump, waitfor, 0);
+ if (error != 0)
allerror = error;
- VOP_UNLOCK(devvp, 0);
if (allerror == 0 && waitfor == MNT_WAIT)
goto loop;
} else if (suspend != 0) {
@@ -1679,7 +1697,7 @@
/*
* We must promote to an exclusive lock for vnode creation. This
* can happen if lookup is passed LOCKSHARED.
- */
+ */
if ((flags & LK_TYPE_MASK) == LK_SHARED) {
flags &= ~LK_TYPE_MASK;
flags |= LK_EXCLUSIVE;
@@ -1695,21 +1713,11 @@
ump = VFSTOUFS(mp);
dev = ump->um_dev;
fs = ump->um_fs;
-
- /*
- * If this malloc() is performed after the getnewvnode()
- * it might block, leaving a vnode with a NULL v_data to be
- * found by ffs_sync() if a sync happens to fire right then,
- * which will cause a panic because ffs_sync() blindly
- * dereferences vp->v_data (as well it should).
- */
ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
/* Allocate a new vnode/inode. */
- if (fs->fs_magic == FS_UFS1_MAGIC)
- error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
- else
- error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
+ error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ?
+ &ffs_vnodeops1 : &ffs_vnodeops2, &vp);
if (error) {
*vpp = NULL;
uma_zfree(uma_inode, ip);
@@ -1728,6 +1736,7 @@
ip->i_dev = dev;
ip->i_number = ino;
ip->i_ea_refs = 0;
+ ip->i_nextclustercg = -1;
#ifdef QUOTA
{
int i;
@@ -1954,7 +1963,7 @@
}
fs->fs_fmod = 0;
fs->fs_time = time_second;
- if (fs->fs_flags & FS_DOSOFTDEP)
+ if (MOUNTEDSOFTDEP(ump->um_mountp))
softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp);
bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
@@ -2012,13 +2021,19 @@
BO_LOCK(bufobj);
if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
panic("backgroundwritedone: lost buffer");
- /* Grab an extra reference to be dropped by the bufdone() below. */
- bufobj_wrefl(bufobj);
+
+ /*
+ * We should mark the cylinder group buffer origbp as
+ * dirty, to not loose the failed write.
+ */
+ if ((bp->b_ioflags & BIO_ERROR) != 0)
+ origbp->b_vflags |= BV_BKGRDERR;
BO_UNLOCK(bufobj);
/*
* Process dependencies then return any unfinished ones.
*/
- if (!LIST_EMPTY(&bp->b_dep))
+ pbrelvp(bp);
+ if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0)
buf_complete(bp);
#ifdef SOFTUPDATES
if (!LIST_EMPTY(&bp->b_dep))
@@ -2030,6 +2045,15 @@
*/
bp->b_flags |= B_NOCACHE;
bp->b_flags &= ~B_CACHE;
+
+ /*
+ * Prevent brelse() from trying to keep and re-dirtying bp on
+ * errors. It causes b_bufobj dereference in
+ * bdirty()/reassignbuf(), and b_bufobj was cleared in
+ * pbrelvp() above.
+ */
+ if ((bp->b_ioflags & BIO_ERROR) != 0)
+ bp->b_flags |= B_INVAL;
bufdone(bp);
BO_LOCK(bufobj);
/*
@@ -2063,7 +2087,6 @@
static int
ffs_bufwrite(struct buf *bp)
{
- int oldflags, s;
struct buf *newbp;
CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
@@ -2072,11 +2095,8 @@
return (0);
}
- oldflags = bp->b_flags;
-
if (!BUF_ISLOCKED(bp))
panic("bufwrite: buffer is not busy???");
- s = splbio();
/*
* If a background write is already in progress, delay
* writing this block if it is asynchronous. Otherwise
@@ -2086,15 +2106,16 @@
if (bp->b_vflags & BV_BKGRDINPROG) {
if (bp->b_flags & B_ASYNC) {
BO_UNLOCK(bp->b_bufobj);
- splx(s);
bdwrite(bp);
return (0);
}
bp->b_vflags |= BV_BKGRDWAIT;
- msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
+ msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj), PRIBIO,
+ "bwrbg", 0);
if (bp->b_vflags & BV_BKGRDINPROG)
panic("bufwrite: still writing");
}
+ bp->b_vflags &= ~BV_BKGRDERR;
BO_UNLOCK(bp->b_bufobj);
/*
@@ -2117,24 +2138,19 @@
if (newbp == NULL)
goto normal_write;
- /*
- * set it to be identical to the old block. We have to
- * set b_lblkno and BKGRDMARKER before calling bgetvp()
- * to avoid confusing the splay tree and gbincore().
- */
+ KASSERT((bp->b_flags & B_UNMAPPED) == 0, ("Unmapped cg"));
memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
- newbp->b_lblkno = bp->b_lblkno;
- newbp->b_xflags |= BX_BKGRDMARKER;
BO_LOCK(bp->b_bufobj);
bp->b_vflags |= BV_BKGRDINPROG;
- bgetvp(bp->b_vp, newbp);
BO_UNLOCK(bp->b_bufobj);
- newbp->b_bufobj = &bp->b_vp->v_bufobj;
+ newbp->b_xflags |= BX_BKGRDMARKER;
+ newbp->b_lblkno = bp->b_lblkno;
newbp->b_blkno = bp->b_blkno;
newbp->b_offset = bp->b_offset;
newbp->b_iodone = ffs_backgroundwritedone;
newbp->b_flags |= B_ASYNC;
newbp->b_flags &= ~B_INVAL;
+ pbgetvp(bp->b_vp, newbp);
#ifdef SOFTUPDATES
/*
@@ -2150,12 +2166,9 @@
#endif
/*
- * Initiate write on the copy, release the original to
- * the B_LOCKED queue so that it cannot go away until
- * the background write completes. If not locked it could go
- * away and then be reconstituted while it was being written.
- * If the reconstituted buffer were written, we could end up
- * with two background copies being written at the same time.
+ * Initiate write on the copy, release the original. The
+ * BKGRDINPROG flag prevents it from going away until
+ * the background write completes.
*/
bqrelse(bp);
bp = newbp;
@@ -2241,15 +2254,10 @@
}
#ifdef DDB
+#ifdef SOFTUPDATES
-static void
-db_print_ffs(struct ufsmount *ump)
-{
- db_printf("mp %p %s devvp %p fs %p su_wl %d su_deps %d su_req %d\n",
- ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
- ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
- ump->softdep_deps, ump->softdep_req);
-}
+/* defined in ffs_softdep.c */
+extern void db_print_ffs(struct ufsmount *ump);
DB_SHOW_COMMAND(ffs, db_show_ffs)
{
@@ -2268,4 +2276,5 @@
}
}
+#endif /* SOFTUPDATES */
#endif /* DDB */
Modified: trunk/sys/ufs/ffs/ffs_vnops.c
===================================================================
--- trunk/sys/ufs/ffs/ffs_vnops.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/ffs_vnops.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
* All rights reserved.
@@ -57,12 +58,12 @@
* SUCH DAMAGE.
*
* from: @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95
- * from: $MidnightBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
+ * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
* @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ffs/ffs_vnops.c 284201 2015-06-10 02:14:33Z kib $");
#include <sys/param.h>
#include <sys/bio.h>
@@ -75,6 +76,7 @@
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/priv.h>
+#include <sys/rwlock.h>
#include <sys/stat.h>
#include <sys/vmmeter.h>
#include <sys/vnode.h>
@@ -199,8 +201,8 @@
* bo_dirty list. Recheck and resync as needed.
*/
BO_LOCK(bo);
- if (vp->v_type == VREG && (bo->bo_numoutput > 0 ||
- bo->bo_dirty.bv_cnt > 0)) {
+ if ((vp->v_type == VREG || vp->v_type == VDIR) &&
+ (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
BO_UNLOCK(bo);
goto retry;
}
@@ -258,9 +260,17 @@
continue;
if (bp->b_lblkno > lbn)
panic("ffs_syncvnode: syncing truncated data.");
- if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
+ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) {
+ BO_UNLOCK(bo);
+ } else if (wait != 0) {
+ if (BUF_LOCK(bp,
+ LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
+ BO_LOCKPTR(bo)) != 0) {
+ bp->b_vflags &= ~BV_SCANNED;
+ goto next;
+ }
+ } else
continue;
- BO_UNLOCK(bo);
if ((bp->b_flags & B_DELWRI) == 0)
panic("ffs_fsync: not dirty");
/*
@@ -508,7 +518,8 @@
/*
* Don't do readahead if this is the end of the file.
*/
- error = bread(vp, lbn, size, NOCRED, &bp);
+ error = bread_gb(vp, lbn, size, NOCRED,
+ GB_UNMAPPED, &bp);
} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
/*
* Otherwise if we are allowed to cluster,
@@ -519,7 +530,7 @@
*/
error = cluster_read(vp, ip->i_size, lbn,
size, NOCRED, blkoffset + uio->uio_resid,
- seqcount, &bp);
+ seqcount, GB_UNMAPPED, &bp);
} else if (seqcount > 1) {
/*
* If we are NOT allowed to cluster, then
@@ -529,9 +540,9 @@
* arguments point to arrays of the size specified in
* the 6th argument.
*/
- int nextsize = blksize(fs, ip, nextlbn);
- error = breadn(vp, lbn,
- size, &nextlbn, &nextsize, 1, NOCRED, &bp);
+ u_int nextsize = blksize(fs, ip, nextlbn);
+ error = breadn_flags(vp, lbn, size, &nextlbn,
+ &nextsize, 1, NOCRED, GB_UNMAPPED, &bp);
} else {
/*
* Failing all of the above, just read what the
@@ -538,7 +549,8 @@
* user asked for. Interestingly, the same as
* the first option above.
*/
- error = bread(vp, lbn, size, NOCRED, &bp);
+ error = bread_gb(vp, lbn, size, NOCRED,
+ GB_UNMAPPED, &bp);
}
if (error) {
brelse(bp);
@@ -569,8 +581,13 @@
xfersize = size;
}
- error = vn_io_fault_uiomove((char *)bp->b_data + blkoffset,
- (int)xfersize, uio);
+ if ((bp->b_flags & B_UNMAPPED) == 0) {
+ error = vn_io_fault_uiomove((char *)bp->b_data +
+ blkoffset, (int)xfersize, uio);
+ } else {
+ error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
+ (int)xfersize, uio);
+ }
if (error)
break;
@@ -612,7 +629,7 @@
}
if ((error == 0 || uio->uio_resid != orig_resid) &&
- (vp->v_mount->mnt_flag & MNT_NOATIME) == 0 &&
+ (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0 &&
(ip->i_flag & IN_ACCESS) == 0) {
VI_LOCK(vp);
ip->i_flag |= IN_ACCESS;
@@ -701,6 +718,7 @@
flags = seqcount << BA_SEQSHIFT;
if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
flags |= IO_SYNC;
+ flags |= BA_UNMAPPED;
for (error = 0; uio->uio_resid > 0;) {
lbn = lblkno(fs, uio->uio_offset);
@@ -711,10 +729,10 @@
if (uio->uio_offset + xfersize > ip->i_size)
vnode_pager_setsize(vp, uio->uio_offset + xfersize);
- /*
+ /*
* We must perform a read-before-write if the transfer size
* does not cover the entire buffer.
- */
+ */
if (fs->fs_bsize > xfersize)
flags |= BA_CLRBUF;
else
@@ -740,8 +758,13 @@
if (size < xfersize)
xfersize = size;
- error = vn_io_fault_uiomove((char *)bp->b_data + blkoffset,
- (int)xfersize, uio);
+ if ((bp->b_flags & B_UNMAPPED) == 0) {
+ error = vn_io_fault_uiomove((char *)bp->b_data +
+ blkoffset, (int)xfersize, uio);
+ } else {
+ error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
+ (int)xfersize, uio);
+ }
/*
* If the buffer is not already filled and we encounter an
* error while trying to fill it, we have to clear out any
@@ -784,7 +807,8 @@
} else if (xfersize + blkoffset == fs->fs_bsize) {
if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
bp->b_flags |= B_CLUSTEROK;
- cluster_write(vp, bp, ip->i_size, seqcount);
+ cluster_write(vp, bp, ip->i_size, seqcount,
+ GB_UNMAPPED);
} else {
bawrite(bp);
}
@@ -814,8 +838,7 @@
if (error) {
if (ioflag & IO_UNIT) {
(void)ffs_truncate(vp, osize,
- IO_NORMAL | (ioflag & IO_SYNC),
- ap->a_cred, uio->uio_td);
+ IO_NORMAL | (ioflag & IO_SYNC), ap->a_cred);
uio->uio_offset -= resid - uio->uio_resid;
uio->uio_resid = resid;
}
@@ -844,7 +867,7 @@
* user programs might reference data beyond the actual end of file
* occuring within the page. We have to zero that data.
*/
- VM_OBJECT_LOCK(mreq->object);
+ VM_OBJECT_WLOCK(mreq->object);
if (mreq->valid) {
if (mreq->valid != VM_PAGE_BITS_ALL)
vm_page_zero_invalid(mreq, TRUE);
@@ -855,10 +878,10 @@
vm_page_unlock(ap->a_m[i]);
}
}
- VM_OBJECT_UNLOCK(mreq->object);
+ VM_OBJECT_WUNLOCK(mreq->object);
return VM_PAGER_OK;
}
- VM_OBJECT_UNLOCK(mreq->object);
+ VM_OBJECT_WUNLOCK(mreq->object);
return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
ap->a_count,
@@ -942,7 +965,7 @@
* arguments point to arrays of the size specified in
* the 6th argument.
*/
- int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
+ u_int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
nextlbn = -1 - nextlbn;
error = breadn(vp, -1 - lbn,
@@ -1068,7 +1091,7 @@
/*
* We must perform a read-before-write if the transfer size
* does not cover the entire buffer.
- */
+ */
if (fs->fs_bsize > xfersize)
flags |= BA_CLRBUF;
else
@@ -1137,7 +1160,7 @@
if (error) {
if (ioflag & IO_UNIT) {
(void)ffs_truncate(vp, osize,
- IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td);
+ IO_EXT | (ioflag&IO_SYNC), ucred);
uio->uio_offset -= resid - uio->uio_resid;
uio->uio_resid = resid;
}
@@ -1204,7 +1227,8 @@
struct fs *fs;
struct uio luio;
struct iovec liovec;
- int easize, error;
+ u_int easize;
+ int error;
u_char *eae;
ip = VTOI(vp);
@@ -1328,7 +1352,7 @@
luio.uio_td = td;
/* XXX: I'm not happy about truncating to zero size */
if (ip->i_ea_len < dp->di_extsize)
- error = ffs_truncate(vp, 0, IO_EXT, cred, td);
+ error = ffs_truncate(vp, 0, IO_EXT, cred);
error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
}
if (--ip->i_ea_refs == 0) {
@@ -1384,12 +1408,7 @@
};
*/
{
- struct inode *ip;
- struct fs *fs;
- ip = VTOI(ap->a_vp);
- fs = ip->i_fs;
-
if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
return (EOPNOTSUPP);
@@ -1412,12 +1431,7 @@
};
*/
{
- struct inode *ip;
- struct fs *fs;
- ip = VTOI(ap->a_vp);
- fs = ip->i_fs;
-
if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
return (EOPNOTSUPP);
@@ -1530,13 +1544,11 @@
*/
{
struct inode *ip;
- struct fs *fs;
u_char *eae, *p;
unsigned easize;
int error, ealen;
ip = VTOI(ap->a_vp);
- fs = ip->i_fs;
if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
return (EOPNOTSUPP);
@@ -1585,7 +1597,6 @@
*/
{
struct inode *ip;
- struct fs *fs;
u_char *eae, *p, *pe, *pn;
unsigned easize;
uint32_t ul;
@@ -1592,7 +1603,6 @@
int error, ealen;
ip = VTOI(ap->a_vp);
- fs = ip->i_fs;
if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
return (EOPNOTSUPP);
Modified: trunk/sys/ufs/ffs/fs.h
===================================================================
--- trunk/sys/ufs/ffs/fs.h 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/fs.h 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
@@ -27,11 +28,11 @@
* SUCH DAMAGE.
*
* @(#)fs.h 8.13 (Berkeley) 3/21/95
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ffs/fs.h 322860 2017-08-24 21:44:23Z mckusick $
*/
#ifndef _UFS_FFS_FS_H_
-#define _UFS_FFS_FS_H_
+#define _UFS_FFS_FS_H_
#include <sys/mount.h>
#include <ufs/ufs/dinode.h>
@@ -68,18 +69,18 @@
* given in byte-offset form, so they do not imply a sector size. The
* SBLOCKSEARCH specifies the order in which the locations should be searched.
*/
-#define SBLOCK_FLOPPY 0
-#define SBLOCK_UFS1 8192
-#define SBLOCK_UFS2 65536
-#define SBLOCK_PIGGY 262144
-#define SBLOCKSIZE 8192
-#define SBLOCKSEARCH \
+#define SBLOCK_FLOPPY 0
+#define SBLOCK_UFS1 8192
+#define SBLOCK_UFS2 65536
+#define SBLOCK_PIGGY 262144
+#define SBLOCKSIZE 8192
+#define SBLOCKSEARCH \
{ SBLOCK_UFS2, SBLOCK_UFS1, SBLOCK_FLOPPY, SBLOCK_PIGGY, -1 }
/*
* Max number of fragments per block. This value is NOT tweakable.
*/
-#define MAXFRAG 8
+#define MAXFRAG 8
/*
* Addresses stored in inodes are capable of addressing fragments
@@ -109,7 +110,7 @@
* Note that super blocks are always of size SBLOCKSIZE,
* and that both SBLOCKSIZE and MAXBSIZE must be >= MINBSIZE.
*/
-#define MINBSIZE 4096
+#define MINBSIZE 4096
/*
* The path name on which the filesystem is mounted is maintained
@@ -116,13 +117,13 @@
* in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in
* the super block for this name.
*/
-#define MAXMNTLEN 468
+#define MAXMNTLEN 468
/*
* The volume name for this filesystem is maintained in fs_volname.
* MAXVOLLEN defines the length of the buffer allocated.
*/
-#define MAXVOLLEN 32
+#define MAXVOLLEN 32
/*
* There is a 128-byte region in the superblock reserved for in-core
@@ -147,7 +148,7 @@
* value of fs_maxcontig. To conserve space, a maximum summary size
* is set by FS_MAXCONTIG.
*/
-#define FS_MAXCONTIG 16
+#define FS_MAXCONTIG 16
/*
* MINFREE gives the minimum acceptable percentage of filesystem
@@ -161,8 +162,8 @@
* default value. With 10% free space, fragmentation is not a
* problem, so we choose to optimize for time.
*/
-#define MINFREE 8
-#define DEFAULTOPT FS_OPTTIME
+#define MINFREE 8
+#define DEFAULTOPT FS_OPTTIME
/*
* Grigoriy Orlov <gluk at ptci.ru> has done some extensive work to fine
@@ -173,8 +174,8 @@
* filesystems, but may need to be tuned for odd cases like filesystems
* being used for squid caches or news spools.
*/
-#define AVFILESIZ 16384 /* expected average file size */
-#define AFPDIR 64 /* expected number of files per directory */
+#define AVFILESIZ 16384 /* expected average file size */
+#define AFPDIR 64 /* expected number of files per directory */
/*
* The maximum number of snapshot nodes that can be associated
@@ -184,7 +185,7 @@
* maintaining too many will slow the filesystem performance, so
* having this limit is a good idea.
*/
-#define FSMAXSNAP 20
+#define FSMAXSNAP 20
/*
* Used to identify special blocks in snapshots:
@@ -197,8 +198,8 @@
* identify blocks that are in use by other snapshots (which are
* expunged from this snapshot).
*/
-#define BLK_NOCOPY ((ufs2_daddr_t)(1))
-#define BLK_SNAP ((ufs2_daddr_t)(2))
+#define BLK_NOCOPY ((ufs2_daddr_t)(1))
+#define BLK_SNAP ((ufs2_daddr_t)(2))
/*
* Sysctl values for the fast filesystem.
@@ -214,7 +215,7 @@
#define FFS_ADJ_NIFREE 9 /* adjust number of free inodes */
#define FFS_ADJ_NFFREE 10 /* adjust number of free frags */
#define FFS_ADJ_NUMCLUSTERS 11 /* adjust number of free clusters */
-#define FFS_SET_CWD 12 /* set current directory */
+#define FFS_SET_CWD 12 /* set current directory */
#define FFS_SET_DOTDOT 13 /* set inode number for ".." */
#define FFS_UNLINK 14 /* remove a name in the filesystem */
#define FFS_SET_INODE 15 /* update an on-disk inode */
@@ -234,6 +235,20 @@
};
/*
+ * A recovery structure placed at the end of the boot block area by newfs
+ * that can be used by fsck to search for alternate superblocks.
+ */
+#define RESID (4096 - 20) /* disk sector size minus recovery area size */
+struct fsrecovery {
+ char block[RESID]; /* unused part of sector */
+ int32_t fsr_magic; /* magic number */
+ int32_t fsr_fsbtodb; /* fsbtodb and dbtofsb shift constant */
+ int32_t fsr_sblkno; /* offset of super-block in filesys */
+ int32_t fsr_fpg; /* blocks per group * fs_frag */
+ u_int32_t fsr_ncg; /* number of cylinder groups */
+};
+
+/*
* Per cylinder group information; summarized in blocks allocated
* from first cylinder group data blocks. These blocks have to be
* read in from fs_csaddr (size fs_cssize) in addition to the
@@ -343,7 +358,7 @@
ufs2_daddr_t fs_csaddr; /* blk addr of cyl grp summary area */
int64_t fs_pendingblocks; /* (u) blocks being freed */
u_int32_t fs_pendinginodes; /* (u) inodes being freed */
- ino_t fs_snapinum[FSMAXSNAP];/* list of snapshot inode numbers */
+ uint32_t fs_snapinum[FSMAXSNAP];/* list of snapshot inode numbers */
u_int32_t fs_avgfilesize; /* expected average file size */
u_int32_t fs_avgfpdir; /* expected # of files per directory */
int32_t fs_save_cgsize; /* save real cg size to use fs_bsize */
@@ -377,14 +392,14 @@
#define FS_UFS2_MAGIC 0x19540119 /* UFS2 fast filesystem magic number */
#define FS_BAD_MAGIC 0x19960408 /* UFS incomplete newfs magic number */
#define FS_OKAY 0x7c269d38 /* superblock checksum */
-#define FS_42INODEFMT -1 /* 4.2BSD inode format */
-#define FS_44INODEFMT 2 /* 4.4BSD inode format */
+#define FS_42INODEFMT -1 /* 4.2BSD inode format */
+#define FS_44INODEFMT 2 /* 4.4BSD inode format */
/*
* Preference for optimization.
*/
-#define FS_OPTTIME 0 /* minimize allocation time */
-#define FS_OPTSPACE 1 /* minimize disk fragmentation */
+#define FS_OPTTIME 0 /* minimize allocation time */
+#define FS_OPTSPACE 1 /* minimize disk fragmentation */
/*
* Filesystem flags.
@@ -414,16 +429,16 @@
* labels into extended attributes on the file system rather than maintain
* a single mount label for all objects.
*/
-#define FS_UNCLEAN 0x0001 /* filesystem not clean at mount */
-#define FS_DOSOFTDEP 0x0002 /* filesystem using soft dependencies */
-#define FS_NEEDSFSCK 0x0004 /* filesystem needs sync fsck before mount */
+#define FS_UNCLEAN 0x0001 /* filesystem not clean at mount */
+#define FS_DOSOFTDEP 0x0002 /* filesystem using soft dependencies */
+#define FS_NEEDSFSCK 0x0004 /* filesystem needs sync fsck before mount */
#define FS_SUJ 0x0008 /* Filesystem using softupdate journal */
-#define FS_ACLS 0x0010 /* file system has POSIX.1e ACLs enabled */
-#define FS_MULTILABEL 0x0020 /* file system is MAC multi-label */
-#define FS_GJOURNAL 0x0040 /* gjournaled file system */
-#define FS_FLAGS_UPDATED 0x0080 /* flags have been moved to new location */
-#define FS_NFS4ACLS 0x0100 /* file system has NFSv4 ACLs enabled */
-#define FS_INDEXDIRS 0x0200 /* kernel supports indexed directories */
+#define FS_ACLS 0x0010 /* file system has POSIX.1e ACLs enabled */
+#define FS_MULTILABEL 0x0020 /* file system is MAC multi-label */
+#define FS_GJOURNAL 0x0040 /* gjournaled file system */
+#define FS_FLAGS_UPDATED 0x0080 /* flags have been moved to new location */
+#define FS_NFS4ACLS 0x0100 /* file system has NFSv4 ACLs enabled */
+#define FS_INDEXDIRS 0x0200 /* kernel supports indexed directories */
#define FS_TRIM 0x0400 /* issue BIO_DELETE for deleted blocks */
/*
@@ -446,7 +461,7 @@
* Its size is derived from the size of the maps maintained in the
* cylinder group and the (struct cg) size.
*/
-#define CGSIZE(fs) \
+#define CGSIZE(fs) \
/* base cg */ (sizeof(struct cg) + sizeof(int32_t) + \
/* old btotoff */ (fs)->fs_old_cpg * sizeof(int32_t) + \
/* old boff */ (fs)->fs_old_cpg * sizeof(u_int16_t) + \
@@ -459,12 +474,12 @@
/*
* The minimal number of cylinder groups that should be created.
*/
-#define MINCYLGRPS 4
+#define MINCYLGRPS 4
/*
* Convert cylinder group to base address of its global summary info.
*/
-#define fs_cs(fs, indx) fs_csp[indx]
+#define fs_cs(fs, indx) fs_csp[indx]
/*
* Cylinder group block for a filesystem.
@@ -504,14 +519,14 @@
/*
* Macros for access to cylinder group array structures
*/
-#define cg_chkmagic(cgp) ((cgp)->cg_magic == CG_MAGIC)
-#define cg_inosused(cgp) \
+#define cg_chkmagic(cgp) ((cgp)->cg_magic == CG_MAGIC)
+#define cg_inosused(cgp) \
((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_iusedoff))
-#define cg_blksfree(cgp) \
+#define cg_blksfree(cgp) \
((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_freeoff))
-#define cg_clustersfree(cgp) \
+#define cg_clustersfree(cgp) \
((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_clusteroff))
-#define cg_clustersum(cgp) \
+#define cg_clustersum(cgp) \
((int32_t *)((uintptr_t)(cgp) + (cgp)->cg_clustersumoff))
/*
@@ -532,7 +547,7 @@
#define cgimin(fs, c) (cgstart(fs, c) + (fs)->fs_iblkno) /* inode blk */
#define cgsblock(fs, c) (cgstart(fs, c) + (fs)->fs_sblkno) /* super blk */
#define cgtod(fs, c) (cgstart(fs, c) + (fs)->fs_cblkno) /* cg block */
-#define cgstart(fs, c) \
+#define cgstart(fs, c) \
((fs)->fs_magic == FS_UFS2_MAGIC ? cgbase(fs, c) : \
(cgbase(fs, c) + (fs)->fs_old_cgoffset * ((c) & ~((fs)->fs_old_cgmask))))
@@ -559,7 +574,7 @@
* Extract the bits for a block from a map.
* Compute the cylinder and rotational position of a cyl block addr.
*/
-#define blkmap(fs, map, loc) \
+#define blkmap(fs, map, loc) \
(((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag)))
/*
@@ -567,32 +582,32 @@
* quantities by using shifts and masks in place of divisions
* modulos and multiplications.
*/
-#define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \
+#define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \
((loc) & (fs)->fs_qbmask)
-#define fragoff(fs, loc) /* calculates (loc % fs->fs_fsize) */ \
+#define fragoff(fs, loc) /* calculates (loc % fs->fs_fsize) */ \
((loc) & (fs)->fs_qfmask)
-#define lfragtosize(fs, frag) /* calculates ((off_t)frag * fs->fs_fsize) */ \
+#define lfragtosize(fs, frag) /* calculates ((off_t)frag * fs->fs_fsize) */ \
(((off_t)(frag)) << (fs)->fs_fshift)
-#define lblktosize(fs, blk) /* calculates ((off_t)blk * fs->fs_bsize) */ \
+#define lblktosize(fs, blk) /* calculates ((off_t)blk * fs->fs_bsize) */ \
(((off_t)(blk)) << (fs)->fs_bshift)
/* Use this only when `blk' is known to be small, e.g., < NDADDR. */
-#define smalllblktosize(fs, blk) /* calculates (blk * fs->fs_bsize) */ \
+#define smalllblktosize(fs, blk) /* calculates (blk * fs->fs_bsize) */ \
((blk) << (fs)->fs_bshift)
-#define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \
+#define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \
((loc) >> (fs)->fs_bshift)
-#define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \
+#define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \
((loc) >> (fs)->fs_fshift)
-#define blkroundup(fs, size) /* calculates roundup(size, fs->fs_bsize) */ \
+#define blkroundup(fs, size) /* calculates roundup(size, fs->fs_bsize) */ \
(((size) + (fs)->fs_qbmask) & (fs)->fs_bmask)
-#define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \
+#define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \
(((size) + (fs)->fs_qfmask) & (fs)->fs_fmask)
-#define fragstoblks(fs, frags) /* calculates (frags / fs->fs_frag) */ \
+#define fragstoblks(fs, frags) /* calculates (frags / fs->fs_frag) */ \
((frags) >> (fs)->fs_fragshift)
-#define blkstofrags(fs, blks) /* calculates (blks * fs->fs_frag) */ \
+#define blkstofrags(fs, blks) /* calculates (blks * fs->fs_frag) */ \
((blks) << (fs)->fs_fragshift)
-#define fragnum(fs, fsb) /* calculates (fsb % fs->fs_frag) */ \
+#define fragnum(fs, fsb) /* calculates (fsb % fs->fs_frag) */ \
((fsb) & ((fs)->fs_frag - 1))
-#define blknum(fs, fsb) /* calculates rounddown(fsb, fs->fs_frag) */ \
+#define blknum(fs, fsb) /* calculates rounddown(fsb, fs->fs_frag) */ \
((fsb) &~ ((fs)->fs_frag - 1))
/*
@@ -599,7 +614,7 @@
* Determine the number of available frags given a
* percentage to hold in reserve.
*/
-#define freespace(fs, percentreserved) \
+#define freespace(fs, percentreserved) \
(blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \
(fs)->fs_cstotal.cs_nffree - \
(((off_t)((fs)->fs_dsize)) * (percentreserved) / 100))
@@ -607,11 +622,11 @@
/*
* Determining the size of a file block in the filesystem.
*/
-#define blksize(fs, ip, lbn) \
+#define blksize(fs, ip, lbn) \
(((lbn) >= NDADDR || (ip)->i_size >= smalllblktosize(fs, (lbn) + 1)) \
? (fs)->fs_bsize \
: (fragroundup(fs, blkoff(fs, (ip)->i_size))))
-#define sblksize(fs, size, lbn) \
+#define sblksize(fs, size, lbn) \
(((lbn) >= NDADDR || (size) >= ((lbn) + 1) << (fs)->fs_bshift) \
? (fs)->fs_bsize \
: (fragroundup(fs, blkoff(fs, (size)))))
@@ -702,11 +717,11 @@
*/
struct jrefrec {
uint32_t jr_op;
- ino_t jr_ino;
- ino_t jr_parent;
+ uint32_t jr_ino;
+ uint32_t jr_parent;
uint16_t jr_nlink;
uint16_t jr_mode;
- off_t jr_diroff;
+ int64_t jr_diroff;
uint64_t jr_unused;
};
@@ -716,11 +731,11 @@
*/
struct jmvrec {
uint32_t jm_op;
- ino_t jm_ino;
- ino_t jm_parent;
+ uint32_t jm_ino;
+ uint32_t jm_parent;
uint16_t jm_unused;
- off_t jm_oldoff;
- off_t jm_newoff;
+ int64_t jm_oldoff;
+ int64_t jm_newoff;
};
/*
@@ -744,7 +759,7 @@
struct jtrncrec {
uint32_t jt_op;
uint32_t jt_ino;
- off_t jt_size;
+ int64_t jt_size;
uint32_t jt_extsize;
uint32_t jt_pad[3];
};
Modified: trunk/sys/ufs/ffs/softdep.h
===================================================================
--- trunk/sys/ufs/ffs/softdep.h 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ffs/softdep.h 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
*
@@ -36,7 +37,7 @@
* SUCH DAMAGE.
*
* @(#)softdep.h 9.7 (McKusick) 6/21/00
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ffs/softdep.h 307534 2016-10-17 21:49:54Z mckusick $
*/
#include <sys/queue.h>
@@ -140,10 +141,44 @@
#define UNLINKPREV 0x100000 /* inodedep is pointed at in the unlink list */
#define UNLINKONLIST 0x200000 /* inodedep is in the unlinked list on disk */
#define UNLINKLINKS (UNLINKNEXT | UNLINKPREV)
+#define WRITESUCCEEDED 0x400000 /* the disk write completed successfully */
#define ALLCOMPLETE (ATTACHED | COMPLETE | DEPCOMPLETE)
/*
+ * Values for each of the soft dependency types.
+ */
+#define D_PAGEDEP 0
+#define D_INODEDEP 1
+#define D_BMSAFEMAP 2
+#define D_NEWBLK 3
+#define D_ALLOCDIRECT 4
+#define D_INDIRDEP 5
+#define D_ALLOCINDIR 6
+#define D_FREEFRAG 7
+#define D_FREEBLKS 8
+#define D_FREEFILE 9
+#define D_DIRADD 10
+#define D_MKDIR 11
+#define D_DIRREM 12
+#define D_NEWDIRBLK 13
+#define D_FREEWORK 14
+#define D_FREEDEP 15
+#define D_JADDREF 16
+#define D_JREMREF 17
+#define D_JMVREF 18
+#define D_JNEWBLK 19
+#define D_JFREEBLK 20
+#define D_JFREEFRAG 21
+#define D_JSEG 22
+#define D_JSEGDEP 23
+#define D_SBDEP 24
+#define D_JTRUNC 25
+#define D_JFSYNC 26
+#define D_SENTINEL 27
+#define D_LAST D_SENTINEL
+
+/*
* The workitem queue.
*
* It is sometimes useful and/or necessary to clean up certain dependencies
@@ -170,22 +205,22 @@
unsigned int wk_type:8, /* type of request */
wk_state:24; /* state flags */
};
-#define WK_DATA(wk) ((void *)(wk))
-#define WK_PAGEDEP(wk) ((struct pagedep *)(wk))
-#define WK_INODEDEP(wk) ((struct inodedep *)(wk))
-#define WK_BMSAFEMAP(wk) ((struct bmsafemap *)(wk))
+#define WK_DATA(wk) ((void *)(wk))
+#define WK_PAGEDEP(wk) ((struct pagedep *)(wk))
+#define WK_INODEDEP(wk) ((struct inodedep *)(wk))
+#define WK_BMSAFEMAP(wk) ((struct bmsafemap *)(wk))
#define WK_NEWBLK(wk) ((struct newblk *)(wk))
-#define WK_ALLOCDIRECT(wk) ((struct allocdirect *)(wk))
-#define WK_INDIRDEP(wk) ((struct indirdep *)(wk))
-#define WK_ALLOCINDIR(wk) ((struct allocindir *)(wk))
-#define WK_FREEFRAG(wk) ((struct freefrag *)(wk))
-#define WK_FREEBLKS(wk) ((struct freeblks *)(wk))
-#define WK_FREEWORK(wk) ((struct freework *)(wk))
-#define WK_FREEFILE(wk) ((struct freefile *)(wk))
-#define WK_DIRADD(wk) ((struct diradd *)(wk))
-#define WK_MKDIR(wk) ((struct mkdir *)(wk))
-#define WK_DIRREM(wk) ((struct dirrem *)(wk))
-#define WK_NEWDIRBLK(wk) ((struct newdirblk *)(wk))
+#define WK_ALLOCDIRECT(wk) ((struct allocdirect *)(wk))
+#define WK_INDIRDEP(wk) ((struct indirdep *)(wk))
+#define WK_ALLOCINDIR(wk) ((struct allocindir *)(wk))
+#define WK_FREEFRAG(wk) ((struct freefrag *)(wk))
+#define WK_FREEBLKS(wk) ((struct freeblks *)(wk))
+#define WK_FREEWORK(wk) ((struct freework *)(wk))
+#define WK_FREEFILE(wk) ((struct freefile *)(wk))
+#define WK_DIRADD(wk) ((struct diradd *)(wk))
+#define WK_MKDIR(wk) ((struct mkdir *)(wk))
+#define WK_DIRREM(wk) ((struct dirrem *)(wk))
+#define WK_NEWDIRBLK(wk) ((struct newdirblk *)(wk))
#define WK_JADDREF(wk) ((struct jaddref *)(wk))
#define WK_JREMREF(wk) ((struct jremref *)(wk))
#define WK_JMVREF(wk) ((struct jmvref *)(wk))
@@ -239,8 +274,8 @@
* list, any removed operations are done, and the dependency structure
* is freed.
*/
-#define DAHASHSZ 5
-#define DIRADDHASH(offset) (((offset) >> 2) % DAHASHSZ)
+#define DAHASHSZ 5
+#define DIRADDHASH(offset) (((offset) >> 2) % DAHASHSZ)
struct pagedep {
struct worklist pd_list; /* page buffer */
# define pd_state pd_list.wk_state /* check for multiple I/O starts */
@@ -330,8 +365,8 @@
struct ufs2_dinode *idu_savedino2; /* saved ufs2_dinode contents */
} id_un;
};
-#define id_savedino1 id_un.idu_savedino1
-#define id_savedino2 id_un.idu_savedino2
+#define id_savedino1 id_un.idu_savedino1
+#define id_savedino2 id_un.idu_savedino2
/*
* A "bmsafemap" structure maintains a list of dependency structures
@@ -416,7 +451,8 @@
*/
struct allocdirect {
struct newblk ad_block; /* Common block logic */
-# define ad_state ad_block.nb_list.wk_state /* block pointer state */
+# define ad_list ad_block.nb_list /* block pointer worklist */
+# define ad_state ad_list.wk_state /* block pointer state */
TAILQ_ENTRY(allocdirect) ad_next; /* inodedep's list of allocdirect's */
struct inodedep *ad_inodedep; /* associated inodedep */
ufs2_daddr_t ad_oldblkno; /* old value of block pointer */
@@ -644,8 +680,8 @@
} da_un;
struct workhead da_jwork; /* Journal work awaiting completion. */
};
-#define da_previous da_un.dau_previous
-#define da_pagedep da_un.dau_pagedep
+#define da_previous da_un.dau_previous
+#define da_pagedep da_un.dau_pagedep
/*
* Two "mkdir" structures are needed to track the additional dependencies
@@ -701,8 +737,8 @@
} dm_un;
struct workhead dm_jwork; /* Journal work awaiting completion. */
};
-#define dm_pagedep dm_un.dmu_pagedep
-#define dm_dirinum dm_un.dmu_dirinum
+#define dm_pagedep dm_un.dmu_pagedep
+#define dm_dirinum dm_un.dmu_dirinum
/*
* A "newdirblk" structure tracks the progress of a newly allocated
@@ -947,3 +983,118 @@
struct fs *sb_fs; /* Filesystem pointer within buf. */
struct ufsmount *sb_ump; /* Our mount structure */
};
+
+/*
+ * Private journaling structures.
+ */
+struct jblocks {
+ struct jseglst jb_segs; /* TAILQ of current segments. */
+ struct jseg *jb_writeseg; /* Next write to complete. */
+ struct jseg *jb_oldestseg; /* Oldest segment with valid entries. */
+ struct jextent *jb_extent; /* Extent array. */
+ uint64_t jb_nextseq; /* Next sequence number. */
+ uint64_t jb_oldestwrseq; /* Oldest written sequence number. */
+ uint8_t jb_needseg; /* Need a forced segment. */
+ uint8_t jb_suspended; /* Did journal suspend writes? */
+ int jb_avail; /* Available extents. */
+ int jb_used; /* Last used extent. */
+ int jb_head; /* Allocator head. */
+ int jb_off; /* Allocator extent offset. */
+ int jb_blocks; /* Total disk blocks covered. */
+ int jb_free; /* Total disk blocks free. */
+ int jb_min; /* Minimum free space. */
+ int jb_low; /* Low on space. */
+ int jb_age; /* Insertion time of oldest rec. */
+};
+
+struct jextent {
+ ufs2_daddr_t je_daddr; /* Disk block address. */
+ int je_blocks; /* Disk block count. */
+};
+
+/*
+ * Hash table declarations.
+ */
+LIST_HEAD(mkdirlist, mkdir);
+LIST_HEAD(pagedep_hashhead, pagedep);
+LIST_HEAD(inodedep_hashhead, inodedep);
+LIST_HEAD(newblk_hashhead, newblk);
+LIST_HEAD(bmsafemap_hashhead, bmsafemap);
+TAILQ_HEAD(indir_hashhead, freework);
+
+/*
+ * Per-filesystem soft dependency data.
+ * Allocated at mount and freed at unmount.
+ */
+struct mount_softdeps {
+ struct rwlock sd_fslock; /* softdep lock */
+ struct workhead sd_workitem_pending; /* softdep work queue */
+ struct worklist *sd_worklist_tail; /* Tail pointer for above */
+ struct workhead sd_journal_pending; /* journal work queue */
+ struct worklist *sd_journal_tail; /* Tail pointer for above */
+ struct jblocks *sd_jblocks; /* Journal block information */
+ struct inodedeplst sd_unlinked; /* Unlinked inodes */
+ struct bmsafemaphd sd_dirtycg; /* Dirty CGs */
+ struct mkdirlist sd_mkdirlisthd; /* Track mkdirs */
+ struct pagedep_hashhead *sd_pdhash; /* pagedep hash table */
+ u_long sd_pdhashsize; /* pagedep hash table size-1 */
+ long sd_pdnextclean; /* next hash bucket to clean */
+ struct inodedep_hashhead *sd_idhash; /* inodedep hash table */
+ u_long sd_idhashsize; /* inodedep hash table size-1 */
+ long sd_idnextclean; /* next hash bucket to clean */
+ struct newblk_hashhead *sd_newblkhash; /* newblk hash table */
+ u_long sd_newblkhashsize; /* newblk hash table size-1 */
+ struct bmsafemap_hashhead *sd_bmhash; /* bmsafemap hash table */
+ u_long sd_bmhashsize; /* bmsafemap hash table size-1*/
+ struct indir_hashhead *sd_indirhash; /* indir hash table */
+ u_long sd_indirhashsize; /* indir hash table size-1 */
+ int sd_on_journal; /* Items on the journal list */
+ int sd_on_worklist; /* Items on the worklist */
+ int sd_deps; /* Total dependency count */
+ int sd_accdeps; /* accumulated dep count */
+ int sd_req; /* Wakeup when deps hits 0. */
+ int sd_flags; /* comm with flushing thread */
+ int sd_cleanups; /* Calls to cleanup */
+ struct thread *sd_flushtd; /* thread handling flushing */
+ TAILQ_ENTRY(mount_softdeps) sd_next; /* List of softdep filesystem */
+ struct ufsmount *sd_ump; /* our ufsmount structure */
+ u_long sd_curdeps[D_LAST + 1]; /* count of current deps */
+};
+/*
+ * Flags for communicating with the syncer thread.
+ */
+#define FLUSH_EXIT 0x0001 /* time to exit */
+#define FLUSH_CLEANUP 0x0002 /* need to clear out softdep structures */
+#define FLUSH_STARTING 0x0004 /* flush thread not yet started */
+
+/*
+ * Keep the old names from when these were in the ufsmount structure.
+ */
+#define softdep_workitem_pending um_softdep->sd_workitem_pending
+#define softdep_worklist_tail um_softdep->sd_worklist_tail
+#define softdep_journal_pending um_softdep->sd_journal_pending
+#define softdep_journal_tail um_softdep->sd_journal_tail
+#define softdep_jblocks um_softdep->sd_jblocks
+#define softdep_unlinked um_softdep->sd_unlinked
+#define softdep_dirtycg um_softdep->sd_dirtycg
+#define softdep_mkdirlisthd um_softdep->sd_mkdirlisthd
+#define pagedep_hashtbl um_softdep->sd_pdhash
+#define pagedep_hash_size um_softdep->sd_pdhashsize
+#define pagedep_nextclean um_softdep->sd_pdnextclean
+#define inodedep_hashtbl um_softdep->sd_idhash
+#define inodedep_hash_size um_softdep->sd_idhashsize
+#define inodedep_nextclean um_softdep->sd_idnextclean
+#define newblk_hashtbl um_softdep->sd_newblkhash
+#define newblk_hash_size um_softdep->sd_newblkhashsize
+#define bmsafemap_hashtbl um_softdep->sd_bmhash
+#define bmsafemap_hash_size um_softdep->sd_bmhashsize
+#define indir_hashtbl um_softdep->sd_indirhash
+#define indir_hash_size um_softdep->sd_indirhashsize
+#define softdep_on_journal um_softdep->sd_on_journal
+#define softdep_on_worklist um_softdep->sd_on_worklist
+#define softdep_deps um_softdep->sd_deps
+#define softdep_accdeps um_softdep->sd_accdeps
+#define softdep_req um_softdep->sd_req
+#define softdep_flags um_softdep->sd_flags
+#define softdep_flushtd um_softdep->sd_flushtd
+#define softdep_curdeps um_softdep->sd_curdeps
Modified: trunk/sys/ufs/ufs/README.acls
===================================================================
--- trunk/sys/ufs/ufs/README.acls 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/README.acls 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,4 +1,4 @@
-$MidnightBSD$
+$FreeBSD: stable/10/sys/ufs/ufs/README.acls 105456 2002-10-19 16:09:16Z rwatson $
UFS Access Control Lists Copyright
Property changes on: trunk/sys/ufs/ufs/README.acls
___________________________________________________________________
Added: mnbsd:nokeywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/ufs/ufs/README.extattr
===================================================================
--- trunk/sys/ufs/ufs/README.extattr 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/README.extattr 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,4 +1,4 @@
-$MidnightBSD$
+$FreeBSD: stable/10/sys/ufs/ufs/README.extattr 105417 2002-10-18 21:11:36Z rwatson $
UFS Extended Attributes Copyright
Property changes on: trunk/sys/ufs/ufs/README.extattr
___________________________________________________________________
Added: mnbsd:nokeywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/ufs/ufs/acl.h
===================================================================
--- trunk/sys/ufs/ufs/acl.h 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/acl.h 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1999-2001 Robert N. M. Watson
* All rights reserved.
@@ -25,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/acl.h 200796 2009-12-21 19:39:10Z trasz $
*/
/*
* Developed by the TrustedBSD Project.
Modified: trunk/sys/ufs/ufs/dinode.h
===================================================================
--- trunk/sys/ufs/ufs/dinode.h 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/dinode.h 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2002 Networks Associates Technology, Inc.
* All rights reserved.
@@ -62,7 +63,7 @@
* SUCH DAMAGE.
*
* @(#)dinode.h 8.3 (Berkeley) 1/21/94
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/dinode.h 259223 2013-12-11 19:25:17Z pfg $
*/
#ifndef _UFS_UFS_DINODE_H_
@@ -138,15 +139,15 @@
int32_t di_atimensec; /* 68: Last access time. */
int32_t di_ctimensec; /* 72: Last inode change time. */
int32_t di_birthnsec; /* 76: Inode creation time. */
- int32_t di_gen; /* 80: Generation number. */
+ u_int32_t di_gen; /* 80: Generation number. */
u_int32_t di_kernflags; /* 84: Kernel flags. */
u_int32_t di_flags; /* 88: Status flags (chflags). */
- int32_t di_extsize; /* 92: External attributes block. */
+ u_int32_t di_extsize; /* 92: External attributes size. */
ufs2_daddr_t di_extb[NXADDR];/* 96: External attributes block. */
ufs2_daddr_t di_db[NDADDR]; /* 112: Direct disk blocks. */
ufs2_daddr_t di_ib[NIADDR]; /* 208: Indirect disk blocks. */
u_int64_t di_modrev; /* 232: i_modrev for NFSv4 */
- ino_t di_freelink; /* 240: SUJ: Next unlinked inode. */
+ uint32_t di_freelink; /* 240: SUJ: Next unlinked inode. */
uint32_t di_spare[3]; /* 244: Reserved; currently unused */
};
@@ -168,7 +169,7 @@
struct ufs1_dinode {
u_int16_t di_mode; /* 0: IFMT, permissions; see below. */
int16_t di_nlink; /* 2: File link count. */
- ino_t di_freelink; /* 4: SUJ: Next unlinked inode. */
+ uint32_t di_freelink; /* 4: SUJ: Next unlinked inode. */
u_int64_t di_size; /* 8: File byte count. */
int32_t di_atime; /* 16: Last access time. */
int32_t di_atimensec; /* 20: Last access time. */
@@ -179,8 +180,8 @@
ufs1_daddr_t di_db[NDADDR]; /* 40: Direct disk blocks. */
ufs1_daddr_t di_ib[NIADDR]; /* 88: Indirect disk blocks. */
u_int32_t di_flags; /* 100: Status flags (chflags). */
- int32_t di_blocks; /* 104: Blocks actually held. */
- int32_t di_gen; /* 108: Generation number. */
+ u_int32_t di_blocks; /* 104: Blocks actually held. */
+ u_int32_t di_gen; /* 108: Generation number. */
u_int32_t di_uid; /* 112: File owner. */
u_int32_t di_gid; /* 116: File group. */
u_int64_t di_modrev; /* 120: i_modrev for NFSv4 */
Modified: trunk/sys/ufs/ufs/dir.h
===================================================================
--- trunk/sys/ufs/ufs/dir.h 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/dir.h 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
@@ -32,7 +33,7 @@
* SUCH DAMAGE.
*
* @(#)dir.h 8.2 (Berkeley) 1/21/94
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/dir.h 262779 2014-03-05 04:23:19Z pfg $
*/
#ifndef _UFS_UFS_DIR_H_
@@ -44,7 +45,7 @@
* quantity to keep down the cost of doing lookup on a 32-bit machine.
*/
#define doff_t int32_t
-#define MAXDIRSIZE (0x7fffffff)
+#define MAXDIRSIZE (0x7fffffff)
/*
* A directory consists of some number of blocks of DIRBLKSIZ
@@ -71,7 +72,7 @@
* Entries other than the first in a directory do not normally have
* dp->d_ino set to 0.
*/
-#define DIRBLKSIZ DEV_BSIZE
+#define DIRBLKSIZ DEV_BSIZE
#define MAXNAMLEN 255
struct direct {
@@ -113,14 +114,14 @@
(((uintptr_t)&((struct direct *)0)->d_name + \
((namlen)+1)*sizeof(((struct direct *)0)->d_name[0]) + 3) & ~3)
#if (BYTE_ORDER == LITTLE_ENDIAN)
-#define DIRSIZ(oldfmt, dp) \
+#define DIRSIZ(oldfmt, dp) \
((oldfmt) ? DIRECTSIZ((dp)->d_type) : DIRECTSIZ((dp)->d_namlen))
#else
-#define DIRSIZ(oldfmt, dp) \
+#define DIRSIZ(oldfmt, dp) \
DIRECTSIZ((dp)->d_namlen)
#endif
-#define OLDDIRFMT 1
-#define NEWDIRFMT 0
+#define OLDDIRFMT 1
+#define NEWDIRFMT 0
/*
* Template for manipulating directories. Should use struct direct's,
Modified: trunk/sys/ufs/ufs/dirhash.h
===================================================================
--- trunk/sys/ufs/ufs/dirhash.h 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/dirhash.h 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2001 Ian Dowse. All rights reserved.
*
@@ -22,11 +23,11 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/dirhash.h 262779 2014-03-05 04:23:19Z pfg $
*/
#ifndef _UFS_UFS_DIRHASH_H_
-#define _UFS_UFS_DIRHASH_H_
+#define _UFS_UFS_DIRHASH_H_
#include <sys/_lock.h>
#include <sys/_sx.h>
@@ -44,11 +45,11 @@
* We also maintain information about free space in each block
* to speed up creations.
*/
-#define DIRHASH_EMPTY (-1) /* entry unused */
-#define DIRHASH_DEL (-2) /* deleted entry; may be part of chain */
+#define DIRHASH_EMPTY (-1) /* entry unused */
+#define DIRHASH_DEL (-2) /* deleted entry; may be part of chain */
-#define DIRALIGN 4
-#define DH_NFSTATS (DIRECTSIZ(MAXNAMLEN + 1) / DIRALIGN)
+#define DIRALIGN 4
+#define DH_NFSTATS (DIRECTSIZ(MAXNAMLEN + 1) / DIRALIGN)
/* max DIRALIGN words in a directory entry */
/*
@@ -68,18 +69,18 @@
* case it limits the number of hash builds to 1/DH_SCOREINIT of the
* number of accesses.
*/
-#define DH_SCOREINIT 8 /* initial dh_score when dirhash built */
-#define DH_SCOREMAX 64 /* max dh_score value */
+#define DH_SCOREINIT 8 /* initial dh_score when dirhash built */
+#define DH_SCOREMAX 64 /* max dh_score value */
/*
* The main hash table has 2 levels. It is an array of pointers to
* blocks of DH_NBLKOFF offsets.
*/
-#define DH_BLKOFFSHIFT 8
-#define DH_NBLKOFF (1 << DH_BLKOFFSHIFT)
-#define DH_BLKOFFMASK (DH_NBLKOFF - 1)
+#define DH_BLKOFFSHIFT 8
+#define DH_NBLKOFF (1 << DH_BLKOFFSHIFT)
+#define DH_BLKOFFMASK (DH_NBLKOFF - 1)
-#define DH_ENTRY(dh, slot) \
+#define DH_ENTRY(dh, slot) \
((dh)->dh_hash[(slot) >> DH_BLKOFFSHIFT][(slot) & DH_BLKOFFMASK])
struct dirhash {
Modified: trunk/sys/ufs/ufs/extattr.h
===================================================================
--- trunk/sys/ufs/ufs/extattr.h 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/extattr.h 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1999-2001 Robert N. M. Watson
* All rights reserved.
@@ -25,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/extattr.h 262779 2014-03-05 04:23:19Z pfg $
*/
/*
* Developed by the TrustedBSD Project.
@@ -94,14 +95,14 @@
* attribute name to calculate and set the ea_length, ea_namelength,
* and ea_contentpadlen fields of the extended attribute structure.
*/
-#define EXTATTR_NEXT(eap) \
+#define EXTATTR_NEXT(eap) \
((struct extattr *)(((void *)(eap)) + (eap)->ea_length))
-#define EXTATTR_CONTENT(eap) (((void *)(eap)) + EXTATTR_BASE_LENGTH(eap))
-#define EXTATTR_CONTENT_SIZE(eap) \
+#define EXTATTR_CONTENT(eap) (((void *)(eap)) + EXTATTR_BASE_LENGTH(eap))
+#define EXTATTR_CONTENT_SIZE(eap) \
((eap)->ea_length - EXTATTR_BASE_LENGTH(eap) - (eap)->ea_contentpadlen)
-#define EXTATTR_BASE_LENGTH(eap) \
+#define EXTATTR_BASE_LENGTH(eap) \
((sizeof(struct extattr) + (eap)->ea_namelength + 7) & ~7)
-#define EXTATTR_SET_LENGTHS(eap, contentsize) do { \
+#define EXTATTR_SET_LENGTHS(eap, contentsize) do { \
KASSERT(((eap)->ea_name[0] != 0), \
("Must initialize name before setting lengths")); \
(eap)->ea_namelength = strlen((eap)->ea_name); \
@@ -115,10 +116,6 @@
#include <sys/_sx.h>
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_EXTATTR);
-#endif
-
struct vnode;
LIST_HEAD(ufs_extattr_list_head, ufs_extattr_list_entry);
struct ufs_extattr_list_entry {
@@ -152,7 +149,7 @@
#else
/* User-level definition of KASSERT for macros above */
-#define KASSERT(cond, str) do { \
+#define KASSERT(cond, str) do { \
if (!(cond)) { printf("panic: "); printf(str); printf("\n"); exit(1); }\
} while (0)
Modified: trunk/sys/ufs/ufs/gjournal.h
===================================================================
--- trunk/sys/ufs/ufs/gjournal.h 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/gjournal.h 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd at FreeBSD.org>
* All rights reserved.
@@ -23,11 +24,11 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/gjournal.h 262779 2014-03-05 04:23:19Z pfg $
*/
#ifndef _UFS_UFS_GJOURNAL_H_
-#define _UFS_UFS_GJOURNAL_H_
+#define _UFS_UFS_GJOURNAL_H_
/*
* GEOM journal function prototypes.
Modified: trunk/sys/ufs/ufs/inode.h
===================================================================
--- trunk/sys/ufs/ufs/inode.h 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/inode.h 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1989, 1993
* The Regents of the University of California. All rights reserved.
@@ -32,7 +33,7 @@
* SUCH DAMAGE.
*
* @(#)inode.h 8.9 (Berkeley) 5/14/95
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/inode.h 283640 2015-05-28 00:11:36Z mckusick $
*/
#ifndef _UFS_UFS_INODE_H_
@@ -87,6 +88,8 @@
daddr_t *snapblklist; /* Collect expunged snapshot blocks. */
} i_un;
+ int i_nextclustercg; /* last cg searched for cluster */
+
/*
* Data for extended attribute modification.
*/
@@ -102,7 +105,7 @@
int16_t i_nlink; /* File link count. */
u_int64_t i_size; /* File byte count. */
u_int32_t i_flags; /* Status flags (chflags). */
- int64_t i_gen; /* Generation number. */
+ u_int64_t i_gen; /* Generation number. */
u_int32_t i_uid; /* File owner. */
u_int32_t i_gid; /* File group. */
/*
@@ -129,12 +132,12 @@
#define IN_TRUNCATED 0x0800 /* Journaled truncation pending. */
-#define i_devvp i_ump->um_devvp
-#define i_umbufobj i_ump->um_bo
-#define i_dirhash i_un.dirhash
-#define i_snapblklist i_un.snapblklist
-#define i_din1 dinode_u.din1
-#define i_din2 dinode_u.din2
+#define i_devvp i_ump->um_devvp
+#define i_umbufobj i_ump->um_bo
+#define i_dirhash i_un.dirhash
+#define i_snapblklist i_un.snapblklist
+#define i_din1 dinode_u.din1
+#define i_din2 dinode_u.din2
#ifdef _KERNEL
/*
@@ -154,7 +157,7 @@
#define SHORTLINK(ip) \
(((ip)->i_ump->um_fstype == UFS1) ? \
(caddr_t)(ip)->i_din1->di_db : (caddr_t)(ip)->i_din2->di_db)
-#define IS_SNAPSHOT(ip) ((ip)->i_flags & SF_SNAPSHOT)
+#define IS_SNAPSHOT(ip) ((ip)->i_flags & SF_SNAPSHOT)
/*
* Structure used to pass around logical block paths generated by
@@ -166,21 +169,21 @@
};
/* Convert between inode pointers and vnode pointers. */
-#define VTOI(vp) ((struct inode *)(vp)->v_data)
-#define ITOV(ip) ((ip)->i_vnode)
+#define VTOI(vp) ((struct inode *)(vp)->v_data)
+#define ITOV(ip) ((ip)->i_vnode)
/* Determine if soft dependencies are being done */
-#define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & (MNT_SOFTDEP | MNT_SUJ))
-#define MOUNTEDSOFTDEP(mp) ((mp)->mnt_flag & (MNT_SOFTDEP | MNT_SUJ))
-#define DOINGSUJ(vp) ((vp)->v_mount->mnt_flag & MNT_SUJ)
-#define MOUNTEDSUJ(mp) ((mp)->mnt_flag & MNT_SUJ)
+#define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & (MNT_SOFTDEP | MNT_SUJ))
+#define MOUNTEDSOFTDEP(mp) ((mp)->mnt_flag & (MNT_SOFTDEP | MNT_SUJ))
+#define DOINGSUJ(vp) ((vp)->v_mount->mnt_flag & MNT_SUJ)
+#define MOUNTEDSUJ(mp) ((mp)->mnt_flag & MNT_SUJ)
/* This overlays the fid structure (see mount.h). */
struct ufid {
u_int16_t ufid_len; /* Length of structure. */
u_int16_t ufid_pad; /* Force 32-bit alignment. */
- ino_t ufid_ino; /* File number (ino). */
- int32_t ufid_gen; /* Generation number. */
+ uint32_t ufid_ino; /* File number (ino). */
+ uint32_t ufid_gen; /* Generation number. */
};
#endif /* _KERNEL */
Modified: trunk/sys/ufs/ufs/quota.h
===================================================================
--- trunk/sys/ufs/ufs/quota.h 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/quota.h 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
@@ -30,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)quota.h 8.3 (Berkeley) 8/19/94
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/quota.h 262779 2014-03-05 04:23:19Z pfg $
*/
#ifndef _UFS_UFS_QUOTA_H_
@@ -62,7 +63,7 @@
/*
* Definitions for the default names of the quotas files.
*/
-#define INITQFNAMES { \
+#define INITQFNAMES { \
"user", /* USRQUOTA */ \
"group", /* GRPQUOTA */ \
"undefined", \
@@ -75,8 +76,8 @@
* broken into a main command defined below and a subcommand that is used
* to convey the type of quota that is being manipulated (see above).
*/
-#define SUBCMDMASK 0x00ff
-#define SUBCMDSHIFT 8
+#define SUBCMDMASK 0x00ff
+#define SUBCMDSHIFT 8
#define QCMD(cmd, type) (((cmd) << SUBCMDSHIFT) | ((type) & SUBCMDMASK))
#define Q_QUOTAON 0x0100 /* enable quotas */
@@ -119,10 +120,10 @@
int64_t dqb_itime; /* time limit for excessive files */
};
-#define dqblk dqblk64
+#define dqblk dqblk64
-#define Q_DQHDR64_MAGIC "QUOTA64"
-#define Q_DQHDR64_VERSION 0x20081104
+#define Q_DQHDR64_MAGIC "QUOTA64"
+#define Q_DQHDR64_VERSION 0x20081104
struct dqhdr64 {
char dqh_magic[8]; /* Q_DQHDR64_MAGIC */
Modified: trunk/sys/ufs/ufs/ufs_acl.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_acl.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_acl.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1999-2003 Robert N. M. Watson
* All rights reserved.
@@ -31,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_acl.c 241011 2012-09-27 23:30:49Z mdf $");
#include "opt_ufs.h"
#include "opt_quota.h"
@@ -182,8 +183,8 @@
* are unsafe.
*/
printf("ufs_getacl_nfs4(): Loaded invalid ACL ("
- "%d bytes), inumber %d on %s\n", len,
- ip->i_number, ip->i_fs->fs_fsmnt);
+ "%d bytes), inumber %ju on %s\n", len,
+ (uintmax_t)ip->i_number, ip->i_fs->fs_fsmnt);
return (EPERM);
}
@@ -191,8 +192,8 @@
error = acl_nfs4_check(aclp, vp->v_type == VDIR);
if (error) {
printf("ufs_getacl_nfs4(): Loaded invalid ACL "
- "(failed acl_nfs4_check), inumber %d on %s\n",
- ip->i_number, ip->i_fs->fs_fsmnt);
+ "(failed acl_nfs4_check), inumber %ju on %s\n",
+ (uintmax_t)ip->i_number, ip->i_fs->fs_fsmnt);
return (EPERM);
}
@@ -259,8 +260,8 @@
* DAC protections are unsafe.
*/
printf("ufs_get_oldacl(): Loaded invalid ACL "
- "(len = %d), inumber %d on %s\n", len,
- ip->i_number, ip->i_fs->fs_fsmnt);
+ "(len = %d), inumber %ju on %s\n", len,
+ (uintmax_t)ip->i_number, ip->i_fs->fs_fsmnt);
return (EPERM);
}
Modified: trunk/sys/ufs/ufs/ufs_bmap.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_bmap.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_bmap.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1989, 1991, 1993
* The Regents of the University of California. All rights reserved.
@@ -35,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_bmap.c 284021 2015-06-05 08:36:25Z kib $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -114,7 +115,6 @@
struct buf *bp;
struct ufsmount *ump;
struct mount *mp;
- struct vnode *devvp;
struct indir a[NIADDR+1], *ap;
ufs2_daddr_t daddr;
ufs_lbn_t metalbn;
@@ -125,7 +125,6 @@
ip = VTOI(vp);
mp = vp->v_mount;
ump = VFSTOUFS(mp);
- devvp = ump->um_devvp;
if (runp) {
maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
Modified: trunk/sys/ufs/ufs/ufs_dirhash.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_dirhash.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_dirhash.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2001, 2002 Ian Dowse. All rights reserved.
*
@@ -28,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD: src/sys/ufs/ufs/ufs_dirhash.c,v 1.4 2012/09/14 23:16:21 laffer1 Exp $");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_dirhash.c 326846 2017-12-14 11:45:02Z kib $");
#include "opt_ufs.h"
@@ -189,11 +190,11 @@
struct dirhash *ndh;
struct dirhash *dh;
struct vnode *vp;
- int error;
+ bool excl;
- error = 0;
ndh = dh = NULL;
vp = ip->i_vnode;
+ excl = false;
for (;;) {
/* Racy check for i_dirhash to prefetch a dirhash structure. */
if (ip->i_dirhash == NULL && ndh == NULL) {
@@ -230,8 +231,11 @@
ufsdirhash_hold(dh);
VI_UNLOCK(vp);
- /* Acquire a shared lock on existing hashes. */
- sx_slock(&dh->dh_lock);
+ /* Acquire a lock on existing hashes. */
+ if (excl)
+ sx_xlock(&dh->dh_lock);
+ else
+ sx_slock(&dh->dh_lock);
/* The hash could've been recycled while we were waiting. */
VI_LOCK(vp);
@@ -252,9 +256,10 @@
* so we can recreate it. If we fail the upgrade, drop our
* lock and try again.
*/
- if (sx_try_upgrade(&dh->dh_lock))
+ if (excl || sx_try_upgrade(&dh->dh_lock))
break;
sx_sunlock(&dh->dh_lock);
+ excl = true;
}
/* Free the preallocated structure if it was not necessary. */
if (ndh) {
@@ -273,11 +278,9 @@
ufsdirhash_acquire(struct inode *ip)
{
struct dirhash *dh;
- struct vnode *vp;
ASSERT_VOP_ELOCKED(ip->i_vnode, __FUNCTION__);
- vp = ip->i_vnode;
dh = ip->i_dirhash;
if (dh == NULL)
return (NULL);
@@ -1248,7 +1251,12 @@
{
struct dirhash *dh, *dh_temp;
int memfreed = 0;
- /* XXX: this 10% may need to be adjusted */
+ /*
+ * Will free a *minimum* of 10% of the dirhash, but possibly much
+ * more (depending on dirhashreclaimage). System with large dirhashes
+ * probably also need a much larger dirhashreclaimage.
+ * XXX: this percentage may need to be adjusted.
+ */
int memwanted = ufs_dirhashmem / 10;
ufs_dirhashlowmemcount++;
Modified: trunk/sys/ufs/ufs/ufs_extattr.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_extattr.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_extattr.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1999-2002 Robert N. M. Watson
* Copyright (c) 2002-2003 Networks Associates Technology, Inc.
@@ -38,7 +39,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_extattr.c 302233 2016-06-27 21:44:27Z bdrewery $");
#include "opt_ufs.h"
@@ -69,6 +70,8 @@
#ifdef UFS_EXTATTR
+FEATURE(ufs_extattr, "ufs extended attribute support");
+
static MALLOC_DEFINE(M_UFS_EXTATTR, "ufs_extattr", "ufs extended attribute");
static int ufs_extattr_sync = 0;
@@ -108,7 +111,7 @@
* backing file anyway.
*/
static void
-ufs_extattr_uepm_lock(struct ufsmount *ump, struct thread *td)
+ufs_extattr_uepm_lock(struct ufsmount *ump)
{
sx_xlock(&ump->um_extattr.uepm_lock);
@@ -115,7 +118,7 @@
}
static void
-ufs_extattr_uepm_unlock(struct ufsmount *ump, struct thread *td)
+ufs_extattr_uepm_unlock(struct ufsmount *ump)
{
sx_xunlock(&ump->um_extattr.uepm_lock);
@@ -213,9 +216,9 @@
ump = VFSTOUFS(mp);
- ufs_extattr_uepm_lock(ump, td);
+ ufs_extattr_uepm_lock(ump);
error = ufs_extattr_start_locked(ump, td);
- ufs_extattr_uepm_unlock(ump, td);
+ ufs_extattr_uepm_unlock(ump);
return (error);
}
@@ -335,6 +338,8 @@
}
VOP_ADD_WRITECOUNT(vp, 1);
+ CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d", __func__, vp,
+ vp->v_writecount);
vref(vp);
@@ -397,20 +402,8 @@
return (error);
}
- /*
- * XXXRW: While in UFS, we always get DIRBLKSIZ returns from
- * the directory code on success, on other file systems this
- * may not be the case. For portability, we should check the
- * read length on return from ufs_readdir().
- */
- edp = (struct dirent *)&dirbuf[DIRBLKSIZ];
+ edp = (struct dirent *)&dirbuf[DIRBLKSIZ - auio.uio_resid];
for (dp = (struct dirent *)dirbuf; dp < edp; ) {
-#if (BYTE_ORDER == LITTLE_ENDIAN)
- dp->d_type = dp->d_namlen;
- dp->d_namlen = 0;
-#else
- dp->d_type = 0;
-#endif
if (dp->d_reclen == 0)
break;
error = ufs_extattr_lookup(dvp, UE_GETDIR_LOCKPARENT,
@@ -456,9 +449,9 @@
int error;
ump = VFSTOUFS(mp);
- ufs_extattr_uepm_lock(ump, td);
+ ufs_extattr_uepm_lock(ump);
error = ufs_extattr_autostart_locked(mp, td);
- ufs_extattr_uepm_unlock(ump, td);
+ ufs_extattr_uepm_unlock(ump);
return (error);
}
@@ -564,7 +557,7 @@
struct ufsmount *ump = VFSTOUFS(mp);
int error = 0;
- ufs_extattr_uepm_lock(ump, td);
+ ufs_extattr_uepm_lock(ump);
if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) {
error = EOPNOTSUPP;
@@ -582,7 +575,7 @@
ump->um_extattr.uepm_ucred = NULL;
unlock:
- ufs_extattr_uepm_unlock(ump, td);
+ ufs_extattr_uepm_unlock(ump);
return (error);
}
@@ -607,8 +600,6 @@
attribute = malloc(sizeof(struct ufs_extattr_list_entry),
M_UFS_EXTATTR, M_WAITOK);
- if (attribute == NULL)
- return (ENOMEM);
if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) {
error = EOPNOTSUPP;
@@ -781,10 +772,10 @@
* ufs_extattr_enable_with_open() will always unlock the
* vnode, regardless of failure.
*/
- ufs_extattr_uepm_lock(ump, td);
+ ufs_extattr_uepm_lock(ump);
error = ufs_extattr_enable_with_open(ump, filename_vp,
attrnamespace, attrname, td);
- ufs_extattr_uepm_unlock(ump, td);
+ ufs_extattr_uepm_unlock(ump);
return (error);
@@ -797,10 +788,10 @@
if (attrname == NULL)
return (EINVAL);
- ufs_extattr_uepm_lock(ump, td);
+ ufs_extattr_uepm_lock(ump);
error = ufs_extattr_disable(ump, attrnamespace, attrname,
td);
- ufs_extattr_uepm_unlock(ump, td);
+ ufs_extattr_uepm_unlock(ump);
return (error);
@@ -830,12 +821,12 @@
struct ufsmount *ump = VFSTOUFS(mp);
int error;
- ufs_extattr_uepm_lock(ump, ap->a_td);
+ ufs_extattr_uepm_lock(ump);
error = ufs_extattr_get(ap->a_vp, ap->a_attrnamespace, ap->a_name,
ap->a_uio, ap->a_size, ap->a_cred, ap->a_td);
- ufs_extattr_uepm_unlock(ump, ap->a_td);
+ ufs_extattr_uepm_unlock(ump);
return (error);
}
@@ -932,8 +923,8 @@
* is to coerce this to undefined, and let it get cleaned
* up by the next write or extattrctl clean.
*/
- printf("ufs_extattr_get (%s): inode number inconsistency (%d, %jd)\n",
- mp->mnt_stat.f_mntonname, ueh.ueh_i_gen, (intmax_t)ip->i_gen);
+ printf("ufs_extattr_get (%s): inode number inconsistency (%d, %ju)\n",
+ mp->mnt_stat.f_mntonname, ueh.ueh_i_gen, (uintmax_t)ip->i_gen);
error = ENOATTR;
goto vopunlock_exit;
}
@@ -1000,13 +991,13 @@
struct ufsmount *ump = VFSTOUFS(mp);
int error;
- ufs_extattr_uepm_lock(ump, ap->a_td);
+ ufs_extattr_uepm_lock(ump);
error = ufs_extattr_rm(ap->a_vp, ap->a_attrnamespace, ap->a_name,
ap->a_cred, ap->a_td);
- ufs_extattr_uepm_unlock(ump, ap->a_td);
+ ufs_extattr_uepm_unlock(ump);
return (error);
}
@@ -1037,12 +1028,12 @@
if (ap->a_uio == NULL)
return (EINVAL);
- ufs_extattr_uepm_lock(ump, ap->a_td);
+ ufs_extattr_uepm_lock(ump);
error = ufs_extattr_set(ap->a_vp, ap->a_attrnamespace, ap->a_name,
ap->a_uio, ap->a_cred, ap->a_td);
- ufs_extattr_uepm_unlock(ump, ap->a_td);
+ ufs_extattr_uepm_unlock(ump);
return (error);
}
@@ -1293,10 +1284,10 @@
if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED))
return;
- ufs_extattr_uepm_lock(ump, td);
+ ufs_extattr_uepm_lock(ump);
if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) {
- ufs_extattr_uepm_unlock(ump, td);
+ ufs_extattr_uepm_unlock(ump);
return;
}
@@ -1304,7 +1295,7 @@
ufs_extattr_rm(vp, uele->uele_attrnamespace,
uele->uele_attrname, NULL, td);
- ufs_extattr_uepm_unlock(ump, td);
+ ufs_extattr_uepm_unlock(ump);
}
#endif /* !UFS_EXTATTR */
Modified: trunk/sys/ufs/ufs/ufs_extern.h
===================================================================
--- trunk/sys/ufs/ufs/ufs_extern.h 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_extern.h 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
@@ -27,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)ufs_extern.h 8.10 (Berkeley) 5/14/95
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/ufs_extern.h 262779 2014-03-05 04:23:19Z pfg $
*/
#ifndef _UFS_UFS_EXTERN_H_
@@ -98,7 +99,6 @@
void softdep_setup_directory_change(struct buf *, struct inode *,
struct inode *, ino_t, int);
void softdep_change_linkcnt(struct inode *);
-void softdep_releasefile(struct inode *);
int softdep_slowdown(struct vnode *);
void softdep_setup_create(struct inode *, struct inode *);
void softdep_setup_dotdot_link(struct inode *, struct inode *);
@@ -107,7 +107,6 @@
void softdep_setup_rmdir(struct inode *, struct inode *);
void softdep_setup_unlink(struct inode *, struct inode *);
void softdep_revert_create(struct inode *, struct inode *);
-void softdep_revert_dotdot_link(struct inode *, struct inode *);
void softdep_revert_link(struct inode *, struct inode *);
void softdep_revert_mkdir(struct inode *, struct inode *);
void softdep_revert_rmdir(struct inode *, struct inode *);
@@ -119,10 +118,11 @@
* Note: The general vfs code typically limits the sequential heuristic
* count to 127. See sequential_heuristic() in kern/vfs_vnops.c
*/
-#define BA_CLRBUF 0x00010000 /* Clear invalid areas of buffer. */
-#define BA_METAONLY 0x00020000 /* Return indirect block buffer. */
-#define BA_SEQMASK 0x7F000000 /* Bits holding seq heuristic. */
-#define BA_SEQSHIFT 24
-#define BA_SEQMAX 0x7F
+#define BA_CLRBUF 0x00010000 /* Clear invalid areas of buffer. */
+#define BA_METAONLY 0x00020000 /* Return indirect block buffer. */
+#define BA_UNMAPPED 0x00040000 /* Do not mmap resulted buffer. */
+#define BA_SEQMASK 0x7F000000 /* Bits holding seq heuristic. */
+#define BA_SEQSHIFT 24
+#define BA_SEQMAX 0x7F
#endif /* !_UFS_UFS_EXTERN_H_ */
Modified: trunk/sys/ufs/ufs/ufs_gjournal.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_gjournal.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_gjournal.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd at FreeBSD.org>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_gjournal.c 306630 2016-10-03 10:15:16Z kib $");
#include "opt_ufs.h"
@@ -70,14 +71,17 @@
ino = ip->i_number;
cg = ino_to_cg(fs, ino);
- if (devvp->v_type != VCHR) {
+ if (devvp->v_type == VREG) {
/* devvp is a snapshot */
dev = VTOI(devvp)->i_devvp->v_rdev;
cgbno = fragstoblks(fs, cgtod(fs, cg));
- } else {
+ } else if (devvp->v_type == VCHR) {
/* devvp is a normal disk device */
dev = devvp->v_rdev;
cgbno = fsbtodb(fs, cgtod(fs, cg));
+ } else {
+ bp = NULL;
+ return (EIO);
}
if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
panic("ufs_gjournal_modref: range: dev = %s, ino = %lu, fs = %s",
Modified: trunk/sys/ufs/ufs/ufs_inode.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_inode.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_inode.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1991, 1993, 1995
* The Regents of the University of California. All rights reserved.
@@ -35,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_inode.c 234612 2012-04-23 17:54:49Z trasz $");
#include "opt_quota.h"
#include "opt_ufs.h"
@@ -73,7 +74,6 @@
{
struct vnode *vp = ap->a_vp;
struct inode *ip = VTOI(vp);
- struct thread *td = ap->a_td;
mode_t mode;
int error = 0;
off_t isize;
@@ -129,8 +129,7 @@
if (ip->i_ump->um_fstype == UFS2)
isize += ip->i_din2->di_extsize;
if (ip->i_effnlink <= 0 && isize && !UFS_RDONLY(ip))
- error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL,
- NOCRED, td);
+ error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL, NOCRED);
if (ip->i_nlink <= 0 && ip->i_mode && !UFS_RDONLY(ip)) {
#ifdef QUOTA
if (!getinoquota(ip))
@@ -137,7 +136,7 @@
(void)chkiq(ip, -1, NOCRED, FORCE);
#endif
#ifdef UFS_EXTATTR
- ufs_extattr_vnode_inactive(vp, td);
+ ufs_extattr_vnode_inactive(vp, ap->a_td);
#endif
/*
* Setting the mode to zero needs to wait for the inode
@@ -173,7 +172,7 @@
* so that it can be reused immediately.
*/
if (ip->i_mode == 0)
- vrecycle(vp, td);
+ vrecycle(vp);
if (mp != NULL)
vn_finished_secondary_write(mp);
return (error);
Modified: trunk/sys/ufs/ufs/ufs_lookup.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_lookup.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_lookup.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
@@ -35,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_lookup.c 306180 2016-09-22 10:51:47Z kib $");
#include "opt_ufs.h"
#include "opt_quota.h"
@@ -76,33 +77,7 @@
/* true if old FS format...*/
#define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0)
-#ifdef QUOTA
static int
-ufs_lookup_upgrade_lock(struct vnode *vp)
-{
- int error;
-
- ASSERT_VOP_LOCKED(vp, __FUNCTION__);
- if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE)
- return (0);
-
- error = 0;
-
- /*
- * Upgrade vnode lock, since getinoquota()
- * requires exclusive lock to modify inode.
- */
- vhold(vp);
- vn_lock(vp, LK_UPGRADE | LK_RETRY);
- VI_LOCK(vp);
- if (vp->v_iflag & VI_DOOMED)
- error = ENOENT;
- vdropl(vp);
- return (error);
-}
-#endif
-
-static int
ufs_delete_denied(struct vnode *vdp, struct vnode *tdp, struct ucred *cred,
struct thread *td)
{
@@ -259,12 +234,25 @@
vnode_create_vobject(vdp, DIP(dp, i_size), cnp->cn_thread);
bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
-#ifdef QUOTA
- if ((nameiop == DELETE || nameiop == RENAME) && (flags & ISLASTCN)) {
- error = ufs_lookup_upgrade_lock(vdp);
- if (error != 0)
- return (error);
- }
+
+#ifdef DEBUG_VFS_LOCKS
+ /*
+ * Assert that the directory vnode is locked, and locked
+ * exclusively for the last component lookup for modifying
+ * operations.
+ *
+ * The directory-modifying operations need to save
+ * intermediate state in the inode between namei() call and
+ * actual directory manipulations. See fields in the struct
+ * inode marked as 'used during directory lookup'. We must
+ * ensure that upgrade in namei() does not happen, since
+ * upgrade might need to unlock vdp. If quotas are enabled,
+ * getinoquota() also requires exclusive lock to modify inode.
+ */
+ ASSERT_VOP_LOCKED(vdp, "ufs_lookup1");
+ if ((nameiop == CREATE || nameiop == DELETE || nameiop == RENAME) &&
+ (flags & (LOCKPARENT | ISLASTCN)) == (LOCKPARENT | ISLASTCN))
+ ASSERT_VOP_ELOCKED(vdp, "ufs_lookup2");
#endif
restart:
@@ -550,7 +538,7 @@
/*
* Insert name into cache (as non-existent) if appropriate.
*/
- if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
+ if ((cnp->cn_flags & MAKEENTRY) != 0)
cache_enter(vdp, NULL, cnp);
return (ENOENT);
@@ -770,11 +758,13 @@
mp = ITOV(ip)->v_mount;
if ((mp->mnt_flag & MNT_RDONLY) == 0)
- panic("ufs_dirbad: %s: bad dir ino %lu at offset %ld: %s",
- mp->mnt_stat.f_mntonname, (u_long)ip->i_number, (long)offset, how);
+ panic("ufs_dirbad: %s: bad dir ino %ju at offset %ld: %s",
+ mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number,
+ (long)offset, how);
else
- (void)printf("%s: bad dir ino %lu at offset %ld: %s\n",
- mp->mnt_stat.f_mntonname, (u_long)ip->i_number, (long)offset, how);
+ (void)printf("%s: bad dir ino %ju at offset %ld: %s\n",
+ mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number,
+ (long)offset, how);
}
/*
@@ -879,6 +869,7 @@
struct buf *bp;
u_int dsize;
struct direct *ep, *nep;
+ u_int64_t old_isize;
int error, ret, blkoff, loc, spacefree, flags, namlen;
char *dirbuf;
@@ -907,16 +898,19 @@
return (error);
}
#endif
+ old_isize = dp->i_size;
+ vnode_pager_setsize(dvp, (u_long)dp->i_offset + DIRBLKSIZ);
if ((error = UFS_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ,
cr, flags, &bp)) != 0) {
if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
bdwrite(newdirbp);
+ vnode_pager_setsize(dvp, (u_long)old_isize);
return (error);
}
dp->i_size = dp->i_offset + DIRBLKSIZ;
DIP_SET(dp, i_size, dp->i_size);
+ dp->i_endoff = dp->i_size;
dp->i_flag |= IN_CHANGE | IN_UPDATE;
- vnode_pager_setsize(dvp, (u_long)dp->i_size);
dirp->d_reclen = DIRBLKSIZ;
blkoff = dp->i_offset &
(VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1);
@@ -1128,12 +1122,16 @@
dp->i_endoff && dp->i_endoff < dp->i_size) {
if (tvp != NULL)
VOP_UNLOCK(tvp, 0);
+ error = UFS_TRUNCATE(dvp, (off_t)dp->i_endoff,
+ IO_NORMAL | (DOINGASYNC(dvp) ? 0 : IO_SYNC), cr);
+ if (error != 0)
+ vn_printf(dvp, "ufs_direnter: failed to truncate "
+ "err %d", error);
#ifdef UFS_DIRHASH
- if (dp->i_dirhash != NULL)
+ if (error == 0 && dp->i_dirhash != NULL)
ufsdirhash_dirtrunc(dp, dp->i_endoff);
#endif
- (void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff,
- IO_NORMAL | IO_SYNC, cr, td);
+ error = 0;
if (tvp != NULL)
vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
}
@@ -1209,8 +1207,8 @@
ufsdirhash_remove(dp, rep, dp->i_offset);
#endif
if (ip && rep->d_ino != ip->i_number)
- panic("ufs_dirremove: ip %d does not match dirent ino %d\n",
- ip->i_number, rep->d_ino);
+ panic("ufs_dirremove: ip %ju does not match dirent ino %ju\n",
+ (uintmax_t)ip->i_number, (uintmax_t)rep->d_ino);
if (dp->i_count == 0) {
/*
* First entry in block: set d_ino to zero.
@@ -1251,7 +1249,8 @@
* drop its snapshot reference so that it will be reclaimed
* when last open reference goes away.
*/
- if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 && ip->i_effnlink == 0)
+ if (ip != NULL && (ip->i_flags & SF_SNAPSHOT) != 0 &&
+ ip->i_effnlink == 0)
UFS_SNAPGONE(ip);
return (error);
}
Modified: trunk/sys/ufs/ufs/ufs_quota.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_quota.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_quota.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1990, 1993, 1995
* The Regents of the University of California. All rights reserved.
@@ -33,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_quota.c 306178 2016-09-22 10:47:56Z kib $");
#include "opt_ffs.h"
@@ -307,7 +308,6 @@
chkiq(struct inode *ip, int change, struct ucred *cred, int flags)
{
struct dquot *dq;
- ino_t ncurinodes;
int i, error, warn, do_check;
#ifdef DIAGNOSTIC
@@ -322,10 +322,8 @@
continue;
DQI_LOCK(dq);
DQI_WAIT(dq, PINOD+1, "chkiq1");
- ncurinodes = dq->dq_curinodes + change;
- /* XXX: ncurinodes is unsigned */
- if (dq->dq_curinodes != 0 && ncurinodes >= 0)
- dq->dq_curinodes = ncurinodes;
+ if (dq->dq_curinodes >= -change)
+ dq->dq_curinodes += change;
else
dq->dq_curinodes = 0;
dq->dq_flags &= ~DQ_INODS;
@@ -359,11 +357,8 @@
continue;
DQI_LOCK(dq);
DQI_WAIT(dq, PINOD+1, "chkiq3");
- ncurinodes = dq->dq_curinodes - change;
- /* XXX: ncurinodes is unsigned */
- if (dq->dq_curinodes != 0 &&
- ncurinodes >= 0)
- dq->dq_curinodes = ncurinodes;
+ if (dq->dq_curinodes >= change)
+ dq->dq_curinodes -= change;
else
dq->dq_curinodes = 0;
dq->dq_flags &= ~DQ_INODS;
@@ -497,20 +492,24 @@
struct vnode *vp, **vpp;
struct vnode *mvp;
struct dquot *dq;
- int error, flags, vfslocked;
+ int error, flags;
struct nameidata nd;
error = priv_check(td, PRIV_UFS_QUOTAON);
- if (error)
+ if (error != 0) {
+ vfs_unbusy(mp);
return (error);
+ }
- if (mp->mnt_flag & MNT_RDONLY)
+ if ((mp->mnt_flag & MNT_RDONLY) != 0) {
+ vfs_unbusy(mp);
return (EROFS);
+ }
ump = VFSTOUFS(mp);
dq = NODQUOT;
- NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE, fname, td);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fname, td);
flags = FREAD | FWRITE;
vfs_ref(mp);
vfs_unbusy(mp);
@@ -519,7 +518,6 @@
vfs_rel(mp);
return (error);
}
- vfslocked = NDHASGIANT(&nd);
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
error = vfs_busy(mp, MBF_NOWAIT);
@@ -533,7 +531,6 @@
if (error != 0) {
VOP_UNLOCK(vp, 0);
(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
- VFS_UNLOCK_GIANT(vfslocked);
return (error);
}
@@ -542,7 +539,6 @@
UFS_UNLOCK(ump);
VOP_UNLOCK(vp, 0);
(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
- VFS_UNLOCK_GIANT(vfslocked);
vfs_unbusy(mp);
return (EALREADY);
}
@@ -554,7 +550,6 @@
ump->um_qflags[type] &= ~(QTF_OPENING|QTF_CLOSING);
UFS_UNLOCK(ump);
(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
- VFS_UNLOCK_GIANT(vfslocked);
vfs_unbusy(mp);
return (error);
}
@@ -567,11 +562,23 @@
if (*vpp != vp)
quotaoff1(td, mp, type);
+ /*
+ * When the directory vnode containing the quota file is
+ * inactivated, due to the shared lookup of the quota file
+ * vput()ing the dvp, the qsyncvp() call for the containing
+ * directory would try to acquire the quota lock exclusive.
+ * At the same time, lookup already locked the quota vnode
+ * shared. Mark the quota vnode lock as allowing recursion
+ * and automatically converting shared locks to exclusive.
+ *
+ * Also mark quota vnode as system.
+ */
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
vp->v_vflag |= VV_SYSTEM;
+ VN_LOCK_AREC(vp);
+ VN_LOCK_DSHARE(vp);
VOP_UNLOCK(vp, 0);
*vpp = vp;
- VFS_UNLOCK_GIANT(vfslocked);
/*
* Save the credential of the process that turned on quotas.
* Set up the time limits for this quota.
@@ -643,7 +650,6 @@
struct dquot *dq;
struct inode *ip;
struct ucred *cr;
- int vfslocked;
int error;
ump = VFSTOUFS(mp);
@@ -693,12 +699,10 @@
ump->um_cred[type] = NOCRED;
UFS_UNLOCK(ump);
- vfslocked = VFS_LOCK_GIANT(qvp->v_mount);
vn_lock(qvp, LK_EXCLUSIVE | LK_RETRY);
qvp->v_vflag &= ~VV_SYSTEM;
VOP_UNLOCK(qvp, 0);
error = vn_close(qvp, FREAD|FWRITE, td->td_ucred, td);
- VFS_UNLOCK_GIANT(vfslocked);
crfree(cr);
return (error);
@@ -1250,7 +1254,7 @@
struct vnode *dqvp;
struct iovec aiov;
struct uio auio;
- int vfslocked, dqvplocked, error;
+ int dqvplocked, error;
#ifdef DEBUG_VFS_LOCKS
if (vp != NULLVP)
@@ -1296,12 +1300,10 @@
error = EIO;
}
*dqp = dq;
- vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
if (dqvplocked)
vput(dqvp);
else
vrele(dqvp);
- VFS_UNLOCK_GIANT(vfslocked);
return (error);
}
@@ -1354,12 +1356,10 @@
DQH_UNLOCK();
tablefull("dquot");
*dqp = NODQUOT;
- vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
if (dqvplocked)
vput(dqvp);
else
vrele(dqvp);
- VFS_UNLOCK_GIANT(vfslocked);
return (EUSERS);
}
if (dq->dq_cnt || (dq->dq_flags & DQ_MOD))
@@ -1402,7 +1402,6 @@
auio.uio_rw = UIO_READ;
auio.uio_td = (struct thread *)0;
- vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
error = VOP_READ(dqvp, &auio, 0, ump->um_cred[type]);
if (auio.uio_resid == recsize && error == 0) {
bzero(&dq->dq_dqb, sizeof(dq->dq_dqb));
@@ -1416,7 +1415,6 @@
vput(dqvp);
else
vrele(dqvp);
- VFS_UNLOCK_GIANT(vfslocked);
/*
* I/O error in reading quota file, release
* quota structure and reflect problem to caller.
@@ -1529,7 +1527,7 @@
struct vnode *dqvp;
struct iovec aiov;
struct uio auio;
- int vfslocked, error;
+ int error;
struct mount *mp;
struct ufsmount *ump;
@@ -1545,17 +1543,20 @@
if ((ump = dq->dq_ump) == NULL)
return (0);
UFS_LOCK(ump);
- if ((dqvp = ump->um_quotas[dq->dq_type]) == NULLVP)
- panic("dqsync: file");
+ if ((dqvp = ump->um_quotas[dq->dq_type]) == NULLVP) {
+ if (vp == NULL) {
+ UFS_UNLOCK(ump);
+ return (0);
+ } else
+ panic("dqsync: file");
+ }
vref(dqvp);
UFS_UNLOCK(ump);
- vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
DQI_LOCK(dq);
if ((dq->dq_flags & DQ_MOD) == 0) {
DQI_UNLOCK(dq);
vrele(dqvp);
- VFS_UNLOCK_GIANT(vfslocked);
return (0);
}
DQI_UNLOCK(dq);
@@ -1564,7 +1565,6 @@
if (vp != dqvp)
vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY);
- VFS_UNLOCK_GIANT(vfslocked);
DQI_LOCK(dq);
DQI_WAIT(dq, PINOD+2, "dqsync");
if ((dq->dq_flags & DQ_MOD) == 0)
@@ -1595,9 +1595,7 @@
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_rw = UIO_WRITE;
auio.uio_td = (struct thread *)0;
- vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
error = VOP_WRITE(dqvp, &auio, 0, dq->dq_ump->um_cred[dq->dq_type]);
- VFS_UNLOCK_GIANT(vfslocked);
if (auio.uio_resid && error == 0)
error = EIO;
@@ -1606,13 +1604,11 @@
dq->dq_flags &= ~DQ_MOD;
out:
DQI_UNLOCK(dq);
- vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
if (vp != dqvp)
vput(dqvp);
else
vrele(dqvp);
vn_finished_secondary_write(mp);
- VFS_UNLOCK_GIANT(vfslocked);
return (error);
}
Modified: trunk/sys/ufs/ufs/ufs_vfsops.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_vfsops.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_vfsops.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
@@ -35,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_vfsops.c 278150 2015-02-03 11:54:33Z kib $");
#include "opt_quota.h"
#include "opt_ufs.h"
@@ -92,6 +93,9 @@
void *arg;
{
#ifndef QUOTA
+ if ((cmds >> SUBCMDSHIFT) == Q_QUOTAON)
+ vfs_unbusy(mp);
+
return (EOPNOTSUPP);
#else
struct thread *td;
@@ -112,11 +116,16 @@
break;
default:
+ if (cmd == Q_QUOTAON)
+ vfs_unbusy(mp);
return (EINVAL);
}
}
- if ((u_int)type >= MAXQUOTAS)
+ if ((u_int)type >= MAXQUOTAS) {
+ if (cmd == Q_QUOTAON)
+ vfs_unbusy(mp);
return (EINVAL);
+ }
switch (cmd) {
case Q_QUOTAON:
Modified: trunk/sys/ufs/ufs/ufs_vnops.c
===================================================================
--- trunk/sys/ufs/ufs/ufs_vnops.c 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufs_vnops.c 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1989, 1993, 1995
* The Regents of the University of California. All rights reserved.
@@ -35,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/ufs/ufs/ufs_vnops.c 332750 2018-04-19 02:50:15Z pfg $");
#include "opt_quota.h"
#include "opt_suiddir.h"
@@ -69,8 +70,6 @@
#include <vm/vm.h>
#include <vm/vm_extern.h>
-#include <fs/fifofs/fifo.h>
-
#include <ufs/ufs/acl.h>
#include <ufs/ufs/extattr.h>
#include <ufs/ufs/quota.h>
@@ -107,7 +106,7 @@
static vop_getattr_t ufs_getattr;
static vop_ioctl_t ufs_ioctl;
static vop_link_t ufs_link;
-static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *);
+static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *, const char *);
static vop_markatime_t ufs_markatime;
static vop_mkdir_t ufs_mkdir;
static vop_mknod_t ufs_mknod;
@@ -206,9 +205,11 @@
error =
ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
- ap->a_dvp, ap->a_vpp, ap->a_cnp);
- if (error)
+ ap->a_dvp, ap->a_vpp, ap->a_cnp, "ufs_create");
+ if (error != 0)
return (error);
+ if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0)
+ cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp);
return (0);
}
@@ -232,7 +233,7 @@
int error;
error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
- ap->a_dvp, vpp, ap->a_cnp);
+ ap->a_dvp, vpp, ap->a_cnp, "ufs_mknod");
if (error)
return (error);
ip = VTOI(*vpp);
@@ -530,9 +531,11 @@
return (EINVAL);
}
if (vap->va_flags != VNOVAL) {
- if ((vap->va_flags & ~(UF_NODUMP | UF_IMMUTABLE | UF_APPEND |
- UF_OPAQUE | UF_NOUNLINK | SF_ARCHIVED | SF_IMMUTABLE |
- SF_APPEND | SF_NOUNLINK | SF_SNAPSHOT)) != 0)
+ if ((vap->va_flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE |
+ SF_NOUNLINK | SF_SNAPSHOT | UF_APPEND | UF_ARCHIVE |
+ UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK |
+ UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE |
+ UF_SPARSE | UF_SYSTEM)) != 0)
return (EOPNOTSUPP);
if (vp->v_mount->mnt_flag & MNT_RDONLY)
return (EROFS);
@@ -559,23 +562,17 @@
if (error)
return (error);
}
- /* Snapshot flag cannot be set or cleared */
- if (((vap->va_flags & SF_SNAPSHOT) != 0 &&
- (ip->i_flags & SF_SNAPSHOT) == 0) ||
- ((vap->va_flags & SF_SNAPSHOT) == 0 &&
- (ip->i_flags & SF_SNAPSHOT) != 0))
+ /* The snapshot flag cannot be toggled. */
+ if ((vap->va_flags ^ ip->i_flags) & SF_SNAPSHOT)
return (EPERM);
- ip->i_flags = vap->va_flags;
- DIP_SET(ip, i_flags, vap->va_flags);
} else {
if (ip->i_flags &
(SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
- (vap->va_flags & UF_SETTABLE) != vap->va_flags)
+ ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE))
return (EPERM);
- ip->i_flags &= SF_SETTABLE;
- ip->i_flags |= (vap->va_flags & UF_SETTABLE);
- DIP_SET(ip, i_flags, ip->i_flags);
}
+ ip->i_flags = vap->va_flags;
+ DIP_SET(ip, i_flags, vap->va_flags);
ip->i_flag |= IN_CHANGE;
error = UFS_UPDATE(vp, 0);
if (ip->i_flags & (IMMUTABLE | APPEND))
@@ -630,8 +627,9 @@
*/
return (0);
}
- if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL,
- cred, td)) != 0)
+ if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL |
+ ((vap->va_vaflags & VA_SYNC) != 0 ? IO_SYNC : 0),
+ cred)) != 0)
return (error);
}
if (vap->va_atime.tv_sec != VNOVAL ||
@@ -641,49 +639,17 @@
return (EROFS);
if ((ip->i_flags & SF_SNAPSHOT) != 0)
return (EPERM);
- /*
- * From utimes(2):
- * If times is NULL, ... The caller must be the owner of
- * the file, have permission to write the file, or be the
- * super-user.
- * If times is non-NULL, ... The caller must be the owner of
- * the file or be the super-user.
- *
- * Possibly for historical reasons, try to use VADMIN in
- * preference to VWRITE for a NULL timestamp. This means we
- * will return EACCES in preference to EPERM if neither
- * check succeeds.
- */
- if (vap->va_vaflags & VA_UTIMES_NULL) {
- /*
- * NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes
- *
- * "A user having ACL_WRITE_DATA or ACL_WRITE_ATTRIBUTES
- * will be allowed to set the times [..] to the current
- * server time."
- *
- * XXX: Calling it four times seems a little excessive.
- */
- error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td);
- if (error)
- error = VOP_ACCESS(vp, VWRITE, cred, td);
- } else
- error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td);
- if (error)
+ error = vn_utimes_perm(vp, vap, cred, td);
+ if (error != 0)
return (error);
- if (vap->va_atime.tv_sec != VNOVAL)
- ip->i_flag |= IN_ACCESS;
- if (vap->va_mtime.tv_sec != VNOVAL)
- ip->i_flag |= IN_CHANGE | IN_UPDATE;
- if (vap->va_birthtime.tv_sec != VNOVAL &&
- ip->i_ump->um_fstype == UFS2)
- ip->i_flag |= IN_MODIFIED;
- ufs_itimes(vp);
+ ip->i_flag |= IN_CHANGE | IN_MODIFIED;
if (vap->va_atime.tv_sec != VNOVAL) {
+ ip->i_flag &= ~IN_ACCESS;
DIP_SET(ip, i_atime, vap->va_atime.tv_sec);
DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec);
}
if (vap->va_mtime.tv_sec != VNOVAL) {
+ ip->i_flag &= ~IN_UPDATE;
DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec);
DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec);
}
@@ -979,6 +945,17 @@
return (error);
}
+static void
+print_bad_link_count(const char *funcname, struct vnode *dvp)
+{
+ struct inode *dip;
+
+ dip = VTOI(dvp);
+ uprintf("%s: Bad link count %d on parent inode %d in file system %s\n",
+ funcname, dip->i_effnlink, dip->i_number,
+ dvp->v_mount->mnt_stat.f_mntonname);
+}
+
/*
* link vnode call
*/
@@ -1001,13 +978,11 @@
if ((cnp->cn_flags & HASBUF) == 0)
panic("ufs_link: no name");
#endif
- if (tdvp->v_mount != vp->v_mount) {
- error = EXDEV;
+ if (VTOI(tdvp)->i_effnlink < 2) {
+ print_bad_link_count("ufs_link", tdvp);
+ error = EINVAL;
goto out;
}
- if (VTOI(tdvp)->i_effnlink < 2)
- panic("ufs_link: Bad link count %d on parent",
- VTOI(tdvp)->i_effnlink);
ip = VTOI(vp);
if ((nlink_t)ip->i_nlink >= LINK_MAX) {
error = EMLINK;
@@ -1179,11 +1154,6 @@
mp = NULL;
goto releout;
}
- error = vfs_busy(mp, 0);
- if (error) {
- mp = NULL;
- goto releout;
- }
relock:
/*
* We need to acquire 2 to 4 locks depending on whether tvp is NULL
@@ -1271,7 +1241,7 @@
error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
if (error != 0)
goto releout;
- VOP_UNLOCK(nvp, 0);
+ vput(nvp);
atomic_add_int(&rename_restarts, 1);
goto relock;
}
@@ -1512,8 +1482,8 @@
if (error)
panic("ufs_rename: from entry went away!");
if (ino != fip->i_number)
- panic("ufs_rename: ino mismatch %d != %d\n", ino,
- fip->i_number);
+ panic("ufs_rename: ino mismatch %ju != %ju\n",
+ (uintmax_t)ino, (uintmax_t)fip->i_number);
}
/*
* If the source is a directory with a
@@ -1574,18 +1544,25 @@
* are no longer needed.
*/
if (error == 0 && endoff != 0) {
+ error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | IO_SYNC,
+ tcnp->cn_cred);
+ if (error != 0)
+ vn_printf(tdvp, "ufs_rename: failed to truncate "
+ "err %d", error);
#ifdef UFS_DIRHASH
- if (tdp->i_dirhash != NULL)
+ else if (tdp->i_dirhash != NULL)
ufsdirhash_dirtrunc(tdp, endoff);
#endif
- UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | IO_SYNC, tcnp->cn_cred,
- td);
+ /*
+ * Even if the directory compaction failed, rename was
+ * succesful. Do not propagate a UFS_TRUNCATE() error
+ * to the caller.
+ */
+ error = 0;
}
if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
error = VOP_FSYNC(tdvp, MNT_WAIT, td);
vput(tdvp);
- if (mp)
- vfs_unbusy(mp);
return (error);
bad:
@@ -1603,8 +1580,6 @@
vrele(tdvp);
if (tvp)
vrele(tvp);
- if (mp)
- vfs_unbusy(mp);
return (error);
}
@@ -1751,10 +1726,10 @@
* XXX: This should not happen, as EOPNOTSUPP above was
* supposed to free acl.
*/
- printf("ufs_makeinode: VOP_GETACL() but no "
- "VOP_SETACL()\n");
- /* panic("ufs_makeinode: VOP_GETACL() but no "
- "VOP_SETACL()"); */
+ printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
+ "but no VOP_SETACL()\n");
+ /* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
+ "but no VOP_SETACL()"); */
break;
default:
@@ -1832,6 +1807,11 @@
* but not have it entered in the parent directory. The entry is
* made later after writing "." and ".." entries.
*/
+ if (dp->i_effnlink < 2) {
+ print_bad_link_count("ufs_mkdir", dvp);
+ error = EINVAL;
+ goto out;
+ }
error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
if (error)
goto out;
@@ -1963,6 +1943,7 @@
dirtemplate = *dtp;
dirtemplate.dot_ino = ip->i_number;
dirtemplate.dotdot_ino = dp->i_number;
+ vnode_pager_setsize(tvp, DIRBLKSIZ);
if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
BA_CLRBUF, &bp)) != 0)
goto bad;
@@ -1969,7 +1950,6 @@
ip->i_size = DIRBLKSIZ;
DIP_SET(ip, i_size, DIRBLKSIZ);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
- vnode_pager_setsize(tvp, (u_long)ip->i_size);
bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
if (DOINGSOFTDEP(tvp)) {
/*
@@ -2062,13 +2042,12 @@
* tries to remove a locally mounted on directory).
*/
error = 0;
- if (ip->i_effnlink < 2) {
+ if (dp->i_effnlink <= 2) {
+ if (dp->i_effnlink == 2)
+ print_bad_link_count("ufs_rmdir", dvp);
error = EINVAL;
goto out;
}
- if (dp->i_effnlink < 3)
- panic("ufs_dirrem: Bad link count %d on parent",
- dp->i_effnlink);
if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
error = ENOTEMPTY;
goto out;
@@ -2147,7 +2126,7 @@
int len, error;
error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
- vpp, ap->a_cnp);
+ vpp, ap->a_cnp, "ufs_symlink");
if (error)
return (error);
vp = *vpp;
@@ -2170,12 +2149,6 @@
/*
* Vnode op for reading directories.
- *
- * The routine below assumes that the on-disk format of a directory
- * is the same as that defined by <sys/dirent.h>. If the on-disk
- * format changes, then it will be necessary to do a conversion
- * from the on-disk format that read returns to the format defined
- * by <sys/dirent.h>.
*/
int
ufs_readdir(ap)
@@ -2188,103 +2161,126 @@
u_long **a_cookies;
} */ *ap;
{
+ struct vnode *vp = ap->a_vp;
struct uio *uio = ap->a_uio;
+ struct buf *bp;
struct inode *ip;
+ struct direct *dp, *edp;
+ u_long *cookies;
+ struct dirent dstdp;
+ off_t offset, startoffset;
+ size_t readcnt, skipcnt;
+ ssize_t startresid;
+ int ncookies;
int error;
- size_t count, lost;
- off_t off;
- if (ap->a_ncookies != NULL)
- /*
- * Ensure that the block is aligned. The caller can use
- * the cookies to determine where in the block to start.
- */
- uio->uio_offset &= ~(DIRBLKSIZ - 1);
- ip = VTOI(ap->a_vp);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ ip = VTOI(vp);
if (ip->i_effnlink == 0)
return (0);
- off = uio->uio_offset;
- count = uio->uio_resid;
- /* Make sure we don't return partial entries. */
- if (count <= ((uio->uio_offset + count) & (DIRBLKSIZ -1)))
- return (EINVAL);
- count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
- lost = uio->uio_resid - count;
- uio->uio_resid = count;
- uio->uio_iov->iov_len = count;
-# if (BYTE_ORDER == LITTLE_ENDIAN)
- if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
- error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
- } else {
- struct dirent *dp, *edp;
- struct uio auio;
- struct iovec aiov;
- caddr_t dirbuf;
- int readcnt;
- u_char tmp;
-
- auio = *uio;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_segflg = UIO_SYSSPACE;
- aiov.iov_len = count;
- dirbuf = malloc(count, M_TEMP, M_WAITOK);
- aiov.iov_base = dirbuf;
- error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
- if (error == 0) {
- readcnt = count - auio.uio_resid;
- edp = (struct dirent *)&dirbuf[readcnt];
- for (dp = (struct dirent *)dirbuf; dp < edp; ) {
- tmp = dp->d_namlen;
- dp->d_namlen = dp->d_type;
- dp->d_type = tmp;
- if (dp->d_reclen > 0) {
- dp = (struct dirent *)
- ((char *)dp + dp->d_reclen);
- } else {
- error = EIO;
- break;
- }
- }
- if (dp >= edp)
- error = uiomove(dirbuf, readcnt, uio);
+ if (ap->a_ncookies != NULL) {
+ if (uio->uio_resid < 0)
+ ncookies = 0;
+ else
+ ncookies = uio->uio_resid;
+ if (uio->uio_offset >= ip->i_size)
+ ncookies = 0;
+ else if (ip->i_size - uio->uio_offset < ncookies)
+ ncookies = ip->i_size - uio->uio_offset;
+ ncookies = ncookies / (offsetof(struct direct, d_name) + 4) + 1;
+ cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
+ *ap->a_ncookies = ncookies;
+ *ap->a_cookies = cookies;
+ } else {
+ ncookies = 0;
+ cookies = NULL;
+ }
+ offset = startoffset = uio->uio_offset;
+ startresid = uio->uio_resid;
+ error = 0;
+ while (error == 0 && uio->uio_resid > 0 &&
+ uio->uio_offset < ip->i_size) {
+ error = ffs_blkatoff(vp, uio->uio_offset, NULL, &bp);
+ if (error)
+ break;
+ if (bp->b_offset + bp->b_bcount > ip->i_size)
+ readcnt = ip->i_size - bp->b_offset;
+ else
+ readcnt = bp->b_bcount;
+ skipcnt = (size_t)(uio->uio_offset - bp->b_offset) &
+ ~(size_t)(DIRBLKSIZ - 1);
+ offset = bp->b_offset + skipcnt;
+ dp = (struct direct *)&bp->b_data[skipcnt];
+ edp = (struct direct *)&bp->b_data[readcnt];
+ while (error == 0 && uio->uio_resid > 0 && dp < edp) {
+ if (dp->d_reclen <= offsetof(struct direct, d_name) ||
+ (caddr_t)dp + dp->d_reclen > (caddr_t)edp) {
+ error = EIO;
+ break;
}
- free(dirbuf, M_TEMP);
+#if BYTE_ORDER == LITTLE_ENDIAN
+ /* Old filesystem format. */
+ if (vp->v_mount->mnt_maxsymlinklen <= 0) {
+ dstdp.d_namlen = dp->d_type;
+ dstdp.d_type = dp->d_namlen;
+ } else
+#endif
+ {
+ dstdp.d_namlen = dp->d_namlen;
+ dstdp.d_type = dp->d_type;
+ }
+ if (offsetof(struct direct, d_name) + dstdp.d_namlen >
+ dp->d_reclen) {
+ error = EIO;
+ break;
+ }
+ if (offset < startoffset || dp->d_ino == 0)
+ goto nextentry;
+ dstdp.d_fileno = dp->d_ino;
+ dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp);
+ bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen);
+ dstdp.d_name[dstdp.d_namlen] = '\0';
+ if (dstdp.d_reclen > uio->uio_resid) {
+ if (uio->uio_resid == startresid)
+ error = EINVAL;
+ else
+ error = EJUSTRETURN;
+ break;
+ }
+ /* Advance dp. */
+ error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio);
+ if (error)
+ break;
+ if (cookies != NULL) {
+ KASSERT(ncookies > 0,
+ ("ufs_readdir: cookies buffer too small"));
+ *cookies = offset + dp->d_reclen;
+ cookies++;
+ ncookies--;
+ }
+nextentry:
+ offset += dp->d_reclen;
+ dp = (struct direct *)((caddr_t)dp + dp->d_reclen);
}
-# else
- error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
-# endif
- if (!error && ap->a_ncookies != NULL) {
- struct dirent* dpStart;
- struct dirent* dpEnd;
- struct dirent* dp;
- int ncookies;
- u_long *cookies;
- u_long *cookiep;
-
- if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
- panic("ufs_readdir: unexpected uio from NFS server");
- dpStart = (struct dirent *)
- ((char *)uio->uio_iov->iov_base - (uio->uio_offset - off));
- dpEnd = (struct dirent *) uio->uio_iov->iov_base;
- for (dp = dpStart, ncookies = 0;
- dp < dpEnd;
- dp = (struct dirent *)((caddr_t) dp + dp->d_reclen))
- ncookies++;
- cookies = malloc(ncookies * sizeof(u_long), M_TEMP,
- M_WAITOK);
- for (dp = dpStart, cookiep = cookies;
- dp < dpEnd;
- dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) {
- off += dp->d_reclen;
- *cookiep++ = (u_long) off;
+ bqrelse(bp);
+ uio->uio_offset = offset;
+ }
+ /* We need to correct uio_offset. */
+ uio->uio_offset = offset;
+ if (error == EJUSTRETURN)
+ error = 0;
+ if (ap->a_ncookies != NULL) {
+ if (error == 0) {
+ ap->a_ncookies -= ncookies;
+ } else {
+ free(*ap->a_cookies, M_TEMP);
+ *ap->a_ncookies = 0;
+ *ap->a_cookies = NULL;
}
- *ap->a_ncookies = ncookies;
- *ap->a_cookies = cookies;
}
- uio->uio_resid += lost;
- if (ap->a_eofflag)
- *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
+ if (error == 0 && ap->a_eofflag)
+ *ap->a_eofflag = ip->i_size <= uio->uio_offset;
return (error);
}
@@ -2589,11 +2585,12 @@
* Vnode dvp must be locked.
*/
static int
-ufs_makeinode(mode, dvp, vpp, cnp)
+ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
int mode;
struct vnode *dvp;
struct vnode **vpp;
struct componentname *cnp;
+ const char *callfunc;
{
struct inode *ip, *pdir;
struct direct newdir;
@@ -2603,15 +2600,16 @@
pdir = VTOI(dvp);
#ifdef INVARIANTS
if ((cnp->cn_flags & HASBUF) == 0)
- panic("ufs_makeinode: no name");
+ panic("%s: no name", callfunc);
#endif
*vpp = NULL;
if ((mode & IFMT) == 0)
mode |= IFREG;
- if (VTOI(dvp)->i_effnlink < 2)
- panic("ufs_makeinode: Bad link count %d on parent",
- VTOI(dvp)->i_effnlink);
+ if (pdir->i_effnlink < 2) {
+ print_bad_link_count(callfunc, dvp);
+ return (EINVAL);
+ }
error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
if (error)
return (error);
Modified: trunk/sys/ufs/ufs/ufsmount.h
===================================================================
--- trunk/sys/ufs/ufs/ufsmount.h 2018-05-24 22:27:41 UTC (rev 9896)
+++ trunk/sys/ufs/ufs/ufsmount.h 2018-05-24 22:29:08 UTC (rev 9897)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
@@ -27,11 +28,11 @@
* SUCH DAMAGE.
*
* @(#)ufsmount.h 8.6 (Berkeley) 3/30/95
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/ufs/ufs/ufsmount.h 297787 2016-04-10 16:32:21Z kib $
*/
#ifndef _UFS_UFS_UFSMOUNT_H_
-#define _UFS_UFS_UFSMOUNT_H_
+#define _UFS_UFS_UFSMOUNT_H_
#include <sys/buf.h> /* XXX For struct workhead. */
@@ -52,6 +53,7 @@
struct buf;
struct inode;
struct nameidata;
+struct taskqueue;
struct timeval;
struct ucred;
struct uio;
@@ -78,19 +80,7 @@
u_long um_seqinc; /* inc between seq blocks */
struct mtx um_lock; /* Protects ufsmount & fs */
pid_t um_fsckpid; /* PID permitted fsck sysctls */
- long um_numindirdeps; /* outstanding indirdeps */
- struct workhead softdep_workitem_pending; /* softdep work queue */
- struct worklist *softdep_worklist_tail; /* Tail pointer for above */
- struct workhead softdep_journal_pending; /* journal work queue */
- struct worklist *softdep_journal_tail; /* Tail pointer for above */
- struct jblocks *softdep_jblocks; /* Journal block information */
- struct inodedeplst softdep_unlinked; /* Unlinked inodes */
- struct bmsafemaphd softdep_dirtycg; /* Dirty CGs */
- int softdep_on_journal; /* Items on the journal list */
- int softdep_on_worklist; /* Items on the worklist */
- int softdep_deps; /* Total dependency count */
- int softdep_accdeps; /* accumulated dep count */
- int softdep_req; /* Wakeup when deps hits 0. */
+ struct mount_softdeps *um_softdep; /* softdep mgmt structure */
struct vnode *um_quotas[MAXQUOTAS]; /* pointer to quota files */
struct ucred *um_cred[MAXQUOTAS]; /* quota file access cred */
time_t um_btime[MAXQUOTAS]; /* block quota time limit */
@@ -99,11 +89,15 @@
int64_t um_savedmaxfilesize; /* XXX - limit maxfilesize */
int um_candelete; /* devvp supports TRIM */
int um_writesuspended; /* suspension in progress */
- int (*um_balloc)(struct vnode *, off_t, int, struct ucred *, int, struct buf **);
+ u_int um_trim_inflight;
+ struct taskqueue *um_trim_tq;
+ int (*um_balloc)(struct vnode *, off_t, int, struct ucred *,
+ int, struct buf **);
int (*um_blkatoff)(struct vnode *, off_t, char **, struct buf **);
- int (*um_truncate)(struct vnode *, off_t, int, struct ucred *, struct thread *);
+ int (*um_truncate)(struct vnode *, off_t, int, struct ucred *);
int (*um_update)(struct vnode *, int);
- int (*um_valloc)(struct vnode *, int, struct ucred *, struct vnode **);
+ int (*um_valloc)(struct vnode *, int, struct ucred *,
+ struct vnode **);
int (*um_vfree)(struct vnode *, ino_t, int);
void (*um_ifree)(struct ufsmount *, struct inode *);
int (*um_rdonly)(struct inode *);
@@ -110,13 +104,13 @@
void (*um_snapgone)(struct inode *);
};
-#define UFS_BALLOC(aa, bb, cc, dd, ee, ff) VFSTOUFS((aa)->v_mount)->um_balloc(aa, bb, cc, dd, ee, ff)
-#define UFS_BLKATOFF(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_blkatoff(aa, bb, cc, dd)
-#define UFS_TRUNCATE(aa, bb, cc, dd, ee) VFSTOUFS((aa)->v_mount)->um_truncate(aa, bb, cc, dd, ee)
-#define UFS_UPDATE(aa, bb) VFSTOUFS((aa)->v_mount)->um_update(aa, bb)
-#define UFS_VALLOC(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_valloc(aa, bb, cc, dd)
-#define UFS_VFREE(aa, bb, cc) VFSTOUFS((aa)->v_mount)->um_vfree(aa, bb, cc)
-#define UFS_IFREE(aa, bb) ((aa)->um_ifree(aa, bb))
+#define UFS_BALLOC(aa, bb, cc, dd, ee, ff) VFSTOUFS((aa)->v_mount)->um_balloc(aa, bb, cc, dd, ee, ff)
+#define UFS_BLKATOFF(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_blkatoff(aa, bb, cc, dd)
+#define UFS_TRUNCATE(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_truncate(aa, bb, cc, dd)
+#define UFS_UPDATE(aa, bb) VFSTOUFS((aa)->v_mount)->um_update(aa, bb)
+#define UFS_VALLOC(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_valloc(aa, bb, cc, dd)
+#define UFS_VFREE(aa, bb, cc) VFSTOUFS((aa)->v_mount)->um_vfree(aa, bb, cc)
+#define UFS_IFREE(aa, bb) ((aa)->um_ifree(aa, bb))
#define UFS_RDONLY(aa) ((aa)->i_ump->um_rdonly(aa))
#define UFS_SNAPGONE(aa) ((aa)->i_ump->um_snapgone(aa))
@@ -127,8 +121,8 @@
/*
* Filesystem types
*/
-#define UFS1 1
-#define UFS2 2
+#define UFS1 1
+#define UFS2 2
/*
* Flags describing the state of quotas.
@@ -135,10 +129,10 @@
*/
#define QTF_OPENING 0x01 /* Q_QUOTAON in progress */
#define QTF_CLOSING 0x02 /* Q_QUOTAOFF in progress */
-#define QTF_64BIT 0x04 /* 64-bit quota file */
+#define QTF_64BIT 0x04 /* 64-bit quota file */
/* Convert mount ptr to ufsmount ptr. */
-#define VFSTOUFS(mp) ((struct ufsmount *)((mp)->mnt_data))
+#define VFSTOUFS(mp) ((struct ufsmount *)((mp)->mnt_data))
#define UFSTOVFS(ump) (ump)->um_mountp
/*
@@ -145,7 +139,7 @@
* Macros to access filesystem parameters in the ufsmount structure.
* Used by ufs_bmap.
*/
-#define MNINDIR(ump) ((ump)->um_nindir)
+#define MNINDIR(ump) ((ump)->um_nindir)
#define blkptrtodb(ump, b) ((b) << (ump)->um_bptrtodb)
#define is_sequential(ump, a, b) ((b) == (a) + ump->um_seqinc)
#endif /* _KERNEL */
More information about the Midnightbsd-cvs
mailing list