[Midnightbsd-cvs] src [9424] trunk/sys: - Fix nullfs vnode reference leak in nullfs_reclaim_lowervp().
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Sat Mar 4 17:54:00 EST 2017
Revision: 9424
http://svnweb.midnightbsd.org/src/?rev=9424
Author: laffer1
Date: 2017-03-04 17:54:00 -0500 (Sat, 04 Mar 2017)
Log Message:
-----------
- Fix nullfs vnode reference leak in nullfs_reclaim_lowervp(). The
null_hashget() obtains the reference on the nullfs vnode, which must
be dropped.
- Fix a wart which existed from the introduction of the nullfs
caching, do not unlock lower vnode in the nullfs_reclaim_lowervp().
It should be innocent, but now it is also formally safe. Inform the
nullfs_reclaim() about this using the NULLV_NOUNLOCK flag set on
nullfs inode.
- Add a callback to the upper filesystems for the lower vnode
unlinking. When inactivating a nullfs vnode, check if the lower
vnode was unlinked, indicated by nullfs flag NULLV_DROP or VV_NOSYNC
on the lower vnode, and reclaim upper vnode if so. This allows
nullfs to purge cached vnodes for the unlinked lower vnode, avoiding
excessive caching.
Modified Paths:
--------------
trunk/sys/fs/nullfs/null.h
trunk/sys/fs/nullfs/null_subr.c
trunk/sys/fs/nullfs/null_vfsops.c
trunk/sys/fs/nullfs/null_vnops.c
trunk/sys/kern/vfs_subr.c
trunk/sys/kern/vfs_syscalls.c
trunk/sys/sys/mount.h
Modified: trunk/sys/fs/nullfs/null.h
===================================================================
--- trunk/sys/fs/nullfs/null.h 2017-03-04 22:53:30 UTC (rev 9423)
+++ trunk/sys/fs/nullfs/null.h 2017-03-04 22:54:00 UTC (rev 9424)
@@ -53,8 +53,12 @@
LIST_ENTRY(null_node) null_hash; /* Hash list */
struct vnode *null_lowervp; /* VREFed once */
struct vnode *null_vnode; /* Back pointer */
+ u_int null_flags;
};
+#define NULLV_NOUNLOCK 0x0001
+#define NULLV_DROP 0x0002
+
#define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
#define VTONULL(vp) ((struct null_node *)(vp)->v_data)
#define NULLTOV(xp) ((xp)->null_vnode)
Modified: trunk/sys/fs/nullfs/null_subr.c
===================================================================
--- trunk/sys/fs/nullfs/null_subr.c 2017-03-04 22:53:30 UTC (rev 9423)
+++ trunk/sys/fs/nullfs/null_subr.c 2017-03-04 22:54:00 UTC (rev 9424)
@@ -251,6 +251,7 @@
xp->null_vnode = vp;
xp->null_lowervp = lowervp;
+ xp->null_flags = 0;
vp->v_type = lowervp->v_type;
vp->v_data = xp;
vp->v_vnlock = lowervp->v_vnlock;
Modified: trunk/sys/fs/nullfs/null_vfsops.c
===================================================================
--- trunk/sys/fs/nullfs/null_vfsops.c 2017-03-04 22:53:30 UTC (rev 9423)
+++ trunk/sys/fs/nullfs/null_vfsops.c 2017-03-04 22:54:00 UTC (rev 9424)
@@ -65,7 +65,6 @@
static vfs_unmount_t nullfs_unmount;
static vfs_vget_t nullfs_vget;
static vfs_extattrctl_t nullfs_extattrctl;
-static vfs_reclaim_lowervp_t nullfs_reclaim_lowervp;
/*
* Mount null layer
@@ -390,10 +389,51 @@
vp = null_hashget(mp, lowervp);
if (vp == NULL)
return;
+ VTONULL(vp)->null_flags |= NULLV_NOUNLOCK;
vgone(vp);
- vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
+ vput(vp);
}
+static void
+nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp)
+{
+ struct vnode *vp;
+ struct null_node *xp;
+
+ vp = null_hashget(mp, lowervp);
+ if (vp == NULL)
+ return;
+ xp = VTONULL(vp);
+ xp->null_flags |= NULLV_DROP | NULLV_NOUNLOCK;
+ vhold(vp);
+ vunref(vp);
+
+ if (vp->v_usecount == 0) {
+ /*
+ * If vunref() dropped the last use reference on the
+ * nullfs vnode, it must be reclaimed, and its lock
+ * was split from the lower vnode lock. Need to do
+ * extra unlock before allowing the final vdrop() to
+ * free the vnode.
+ */
+ KASSERT((vp->v_iflag & VI_DOOMED) != 0,
+ ("not reclaimed nullfs vnode %p", vp));
+ VOP_UNLOCK(vp, 0);
+ } else {
+ /*
+ * Otherwise, the nullfs vnode still shares the lock
+ * with the lower vnode, and must not be unlocked.
+ * Also clear the NULLV_NOUNLOCK, the flag is not
+ * relevant for future reclamations.
+ */
+ ASSERT_VOP_ELOCKED(vp, "unlink_lowervp");
+ KASSERT((vp->v_iflag & VI_DOOMED) == 0,
+ ("reclaimed nullfs vnode %p", vp));
+ xp->null_flags &= ~NULLV_NOUNLOCK;
+ }
+ vdrop(vp);
+}
+
static struct vfsops null_vfsops = {
.vfs_extattrctl = nullfs_extattrctl,
.vfs_fhtovp = nullfs_fhtovp,
@@ -407,6 +447,7 @@
.vfs_unmount = nullfs_unmount,
.vfs_vget = nullfs_vget,
.vfs_reclaim_lowervp = nullfs_reclaim_lowervp,
+ .vfs_unlink_lowervp = nullfs_unlink_lowervp,
};
VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL);
Modified: trunk/sys/fs/nullfs/null_vnops.c
===================================================================
--- trunk/sys/fs/nullfs/null_vnops.c 2017-03-04 22:53:30 UTC (rev 9423)
+++ trunk/sys/fs/nullfs/null_vnops.c 2017-03-04 22:54:00 UTC (rev 9424)
@@ -692,18 +692,24 @@
static int
null_inactive(struct vop_inactive_args *ap __unused)
{
- struct vnode *vp;
+ struct vnode *vp, *lvp;
+ struct null_node *xp;
struct mount *mp;
struct null_mount *xmp;
vp = ap->a_vp;
+ xp = VTONULL(vp);
+ lvp = NULLVPTOLOWERVP(vp);
mp = vp->v_mount;
xmp = MOUNTTONULLMOUNT(mp);
- if ((xmp->nullm_flags & NULLM_CACHE) == 0) {
+ if ((xmp->nullm_flags & NULLM_CACHE) == 0 ||
+ (xp->null_flags & NULLV_DROP) != 0 ||
+ (lvp->v_vflag & VV_NOSYNC) != 0) {
/*
* If this is the last reference and caching of the
- * nullfs vnodes is not enabled, then free up the
- * vnode so as not to tie up the lower vnodes.
+ * nullfs vnodes is not enabled, or the lower vnode is
+ * deleted, then free up the vnode so as not to tie up
+ * the lower vnodes.
*/
vp->v_object = NULL;
vrecycle(vp, curthread);
@@ -748,7 +754,10 @@
*/
if (vp->v_writecount > 0)
VOP_ADD_WRITECOUNT(lowervp, -1);
- vput(lowervp);
+ if ((xp->null_flags & NULLV_NOUNLOCK) != 0)
+ vunref(lowervp);
+ else
+ vput(lowervp);
free(xp, M_NULLFSNODE);
return (0);
Modified: trunk/sys/kern/vfs_subr.c
===================================================================
--- trunk/sys/kern/vfs_subr.c 2017-03-04 22:53:30 UTC (rev 9423)
+++ trunk/sys/kern/vfs_subr.c 2017-03-04 22:54:00 UTC (rev 9424)
@@ -2756,19 +2756,20 @@
}
static void
-vgonel_reclaim_lowervp_vfs(struct mount *mp __unused,
+notify_lowervp_vfs_dummy(struct mount *mp __unused,
struct vnode *lowervp __unused)
{
}
/*
- * Notify upper mounts about reclaimed vnode.
+ * Notify upper mounts about reclaimed or unlinked vnode.
*/
-static void
-vgonel_reclaim_lowervp(struct vnode *vp)
+void
+vfs_notify_upper(struct vnode *vp, int event)
{
static struct vfsops vgonel_vfsops = {
- .vfs_reclaim_lowervp = vgonel_reclaim_lowervp_vfs
+ .vfs_reclaim_lowervp = notify_lowervp_vfs_dummy,
+ .vfs_unlink_lowervp = notify_lowervp_vfs_dummy,
};
struct mount *mp, *ump, *mmp;
@@ -2792,7 +2793,17 @@
}
TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link);
MNT_IUNLOCK(mp);
- VFS_RECLAIM_LOWERVP(ump, vp);
+ switch (event) {
+ case VFS_NOTIFY_UPPER_RECLAIM:
+ VFS_RECLAIM_LOWERVP(ump, vp);
+ break;
+ case VFS_NOTIFY_UPPER_UNLINK:
+ VFS_UNLINK_LOWERVP(ump, vp);
+ break;
+ default:
+ KASSERT(0, ("invalid event %d", event));
+ break;
+ }
MNT_ILOCK(mp);
ump = TAILQ_NEXT(mmp, mnt_upper_link);
TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link);
@@ -2839,7 +2850,7 @@
active = vp->v_usecount;
oweinact = (vp->v_iflag & VI_OWEINACT);
VI_UNLOCK(vp);
- vgonel_reclaim_lowervp(vp);
+ vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM);
/*
* Clean out any buffers associated with the vnode.
Modified: trunk/sys/kern/vfs_syscalls.c
===================================================================
--- trunk/sys/kern/vfs_syscalls.c 2017-03-04 22:53:30 UTC (rev 9423)
+++ trunk/sys/kern/vfs_syscalls.c 2017-03-04 22:54:00 UTC (rev 9424)
@@ -1949,6 +1949,7 @@
if (error)
goto out;
#endif
+ vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
#ifdef MAC
out:
@@ -3948,6 +3949,7 @@
return (error);
goto restart;
}
+ vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
vn_finished_write(mp);
out:
Modified: trunk/sys/sys/mount.h
===================================================================
--- trunk/sys/sys/mount.h 2017-03-04 22:53:30 UTC (rev 9423)
+++ trunk/sys/sys/mount.h 2017-03-04 22:54:00 UTC (rev 9424)
@@ -630,7 +630,7 @@
typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op,
struct sysctl_req *req);
typedef void vfs_susp_clean_t(struct mount *mp);
-typedef void vfs_reclaim_lowervp_t(struct mount *mp, struct vnode *lowervp);
+typedef void vfs_notify_lowervp_t(struct mount *mp, struct vnode *lowervp);
struct vfsops {
vfs_mount_t *vfs_mount;
@@ -648,7 +648,8 @@
vfs_extattrctl_t *vfs_extattrctl;
vfs_sysctl_t *vfs_sysctl;
vfs_susp_clean_t *vfs_susp_clean;
- vfs_reclaim_lowervp_t *vfs_reclaim_lowervp;
+ vfs_notify_lowervp_t *vfs_reclaim_lowervp;
+ vfs_notify_lowervp_t *vfs_unlink_lowervp;
};
vfs_statfs_t __vfs_statfs;
@@ -713,6 +714,12 @@
mtx_assert(&Giant, MA_OWNED); \
} while (0)
+#define VFS_UNLINK_LOWERVP(MP, VP) do { \
+ if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) { \
+ (*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP)); \
+ } \
+} while (0)
+
#define VFS_KNOTE_LOCKED(vp, hint) do \
{ \
if (((vp)->v_vflag & VV_NOKNOTE) == 0) \
@@ -725,6 +732,9 @@
VN_KNOTE((vp), (hint), 0); \
} while (0)
+#define VFS_NOTIFY_UPPER_RECLAIM 1
+#define VFS_NOTIFY_UPPER_UNLINK 2
+
#include <sys/module.h>
/*
@@ -803,6 +813,7 @@
void vfs_mount_error(struct mount *, const char *, ...);
void vfs_mountroot(void); /* mount our root filesystem */
void vfs_mountedfrom(struct mount *, const char *from);
+void vfs_notify_upper(struct vnode *, int);
void vfs_oexport_conv(const struct oexport_args *oexp,
struct export_args *exp);
void vfs_ref(struct mount *);
More information about the Midnightbsd-cvs
mailing list