[Midnightbsd-cvs] src: fs/udf: sync with FreeBSD

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Fri Sep 19 20:44:27 EDT 2008


Log Message:
-----------
sync with FreeBSD

Modified Files:
--------------
    src/sys/fs/deadfs:
        dead_vnops.c (r1.2 -> r1.3)
    src/sys/fs/devfs:
        devfs.h (r1.1.1.1 -> r1.2)
        devfs_devs.c (r1.2 -> r1.3)
        devfs_int.h (r1.1.1.1 -> r1.2)
        devfs_rule.c (r1.1.1.2 -> r1.2)
        devfs_vfsops.c (r1.1.1.1 -> r1.2)
        devfs_vnops.c (r1.1.1.1 -> r1.2)
    src/sys/fs/fdescfs:
        fdesc_vfsops.c (r1.1.1.1 -> r1.2)
        fdesc_vnops.c (r1.2 -> r1.3)
    src/sys/fs/fifofs:
        fifo_vnops.c (r1.4 -> r1.5)
    src/sys/fs/msdosfs:
        bootsect.h (r1.2 -> r1.3)
        bpb.h (r1.2 -> r1.3)
        denode.h (r1.2 -> r1.3)
        direntry.h (r1.2 -> r1.3)
        fat.h (r1.2 -> r1.3)
        msdosfs_conv.c (r1.2 -> r1.3)
        msdosfs_denode.c (r1.3 -> r1.4)
        msdosfs_fat.c (r1.2 -> r1.3)
        msdosfs_fileno.c (r1.2 -> r1.3)
        msdosfs_iconv.c (r1.2 -> r1.3)
        msdosfs_lookup.c (r1.2 -> r1.3)
        msdosfs_vfsops.c (r1.4 -> r1.5)
        msdosfs_vnops.c (r1.7 -> r1.8)
        msdosfsmount.h (r1.2 -> r1.3)
    src/sys/fs/ntfs:
        ntfs.h (r1.1.1.1 -> r1.2)
        ntfs_ihash.c (r1.1.1.1 -> r1.2)
        ntfs_subr.c (r1.1.1.2 -> r1.2)
        ntfs_subr.h (r1.1.1.1 -> r1.2)
        ntfs_vfsops.c (r1.1.1.2 -> r1.2)
        ntfs_vnops.c (r1.2 -> r1.3)
    src/sys/fs/nullfs:
        null_subr.c (r1.2 -> r1.3)
        null_vfsops.c (r1.2 -> r1.3)
        null_vnops.c (r1.2 -> r1.3)
    src/sys/fs/nwfs:
        nwfs_io.c (r1.1.1.1 -> r1.2)
        nwfs_node.c (r1.2 -> r1.3)
        nwfs_subr.c (r1.1.1.1 -> r1.2)
        nwfs_vfsops.c (r1.1.1.2 -> r1.2)
        nwfs_vnops.c (r1.1.1.1 -> r1.2)
    src/sys/fs/portalfs:
        portal_vfsops.c (r1.1.1.1 -> r1.2)
        portal_vnops.c (r1.1.1.1 -> r1.2)
    src/sys/fs/procfs:
        procfs.c (r1.1.1.1 -> r1.2)
        procfs_ctl.c (r1.2 -> r1.3)
        procfs_dbregs.c (r1.1.1.2 -> r1.2)
        procfs_fpregs.c (r1.1.1.2 -> r1.2)
        procfs_ioctl.c (r1.1.1.1 -> r1.2)
        procfs_map.c (r1.1.1.1 -> r1.2)
        procfs_regs.c (r1.1.1.2 -> r1.2)
        procfs_status.c (r1.1.1.2 -> r1.2)
    src/sys/fs/pseudofs:
        pseudofs.c (r1.1.1.1 -> r1.2)
        pseudofs.h (r1.1.1.1 -> r1.2)
        pseudofs_fileno.c (r1.1.1.1 -> r1.2)
        pseudofs_internal.h (r1.1.1.1 -> r1.2)
        pseudofs_vncache.c (r1.1.1.1 -> r1.2)
        pseudofs_vnops.c (r1.2 -> r1.3)
    src/sys/fs/smbfs:
        smbfs_io.c (r1.1.1.1 -> r1.2)
        smbfs_node.c (r1.2 -> r1.3)
        smbfs_smb.c (r1.2 -> r1.3)
        smbfs_subr.c (r1.1.1.1 -> r1.2)
        smbfs_vfsops.c (r1.1.1.2 -> r1.2)
        smbfs_vnops.c (r1.2 -> r1.3)
    src/sys/fs/udf:
        ecma167-udf.h (r1.1.1.1 -> r1.2)
        osta.h (r1.1.1.1 -> r1.2)
        udf.h (r1.1.1.1 -> r1.2)
        udf_vfsops.c (r1.2 -> r1.3)
        udf_vnops.c (r1.2 -> r1.3)
    src/sys/fs/unionfs:
        union.h (r1.1.1.1 -> r1.2)
        union_subr.c (r1.1.1.1 -> r1.2)
        union_vfsops.c (r1.1.1.1 -> r1.2)
        union_vnops.c (r1.1.1.1 -> r1.2)

Added Files:
-----------
    src/sys/fs/cd9660:
        TODO (r1.1)
        TODO.hibler (r1.1)
        cd9660_bmap.c (r1.1)
        cd9660_iconv.c (r1.1)
        cd9660_lookup.c (r1.1)
        cd9660_mount.h (r1.1)
        cd9660_node.c (r1.1)
        cd9660_node.h (r1.1)
        cd9660_rrip.c (r1.1)
        cd9660_rrip.h (r1.1)
        cd9660_util.c (r1.1)
        cd9660_vfsops.c (r1.1)
        cd9660_vnops.c (r1.1)
        iso.h (r1.1)
        iso_rrip.h (r1.1)
    src/sys/fs/coda:
        README (r1.1)
        TODO (r1.1)
        cnode.h (r1.1)
        coda.h (r1.1)
        coda_fbsd.c (r1.1)
        coda_io.h (r1.1)
        coda_kernel.h (r1.1)
        coda_namecache.c (r1.1)
        coda_namecache.h (r1.1)
        coda_opstats.h (r1.1)
        coda_pioctl.h (r1.1)
        coda_psdev.c (r1.1)
        coda_psdev.h (r1.1)
        coda_subr.c (r1.1)
        coda_subr.h (r1.1)
        coda_venus.c (r1.1)
        coda_venus.h (r1.1)
        coda_vfsops.c (r1.1)
        coda_vfsops.h (r1.1)
        coda_vnops.c (r1.1)
        coda_vnops.h (r1.1)
    src/sys/fs/tmpfs:
        tmpfs.h (r1.1)
        tmpfs_fifoops.c (r1.1)
        tmpfs_fifoops.h (r1.1)
        tmpfs_subr.c (r1.1)
        tmpfs_vfsops.c (r1.1)
        tmpfs_vnops.c (r1.1)
        tmpfs_vnops.h (r1.1)

-------------- next part --------------
Index: devfs_rule.c
===================================================================
RCS file: /home/cvs/src/sys/fs/devfs/devfs_rule.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/fs/devfs/devfs_rule.c -L sys/fs/devfs/devfs_rule.c -u -r1.1.1.2 -r1.2
--- sys/fs/devfs/devfs_rule.c
+++ sys/fs/devfs/devfs_rule.c
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/devfs/devfs_rule.c,v 1.14.2.3 2006/01/11 10:15:57 rwatson Exp $
+ * $FreeBSD: src/sys/fs/devfs/devfs_rule.c,v 1.23 2006/11/06 13:41:56 rwatson Exp $
  */
 
 /*
@@ -62,13 +62,12 @@
  * know the convention).
  */
 
-#include "opt_devfs.h"
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
+#include <sys/priv.h>
 #include <sys/dirent.h>
 #include <sys/ioccom.h>
 #include <sys/lock.h>
@@ -166,11 +165,13 @@
 	sx_assert(&dm->dm_lock, SX_XLOCKED);
 
 	/*
-	 * XXX: This returns an error regardless of whether we
-	 * actually support the cmd or not.
+	 * XXX: This returns an error regardless of whether we actually
+	 * support the cmd or not.
+	 *
+	 * We could make this privileges finer grained if desired.
 	 */
-	error = suser(td);
-	if (error != 0)
+	error = priv_check(td, PRIV_DEVFS_RULE);
+	if (error)
 		return (error);
 
 	sx_xlock(&sx_rules);
Index: devfs_int.h
===================================================================
RCS file: /home/cvs/src/sys/fs/devfs/devfs_int.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/devfs/devfs_int.h -L sys/fs/devfs/devfs_int.h -u -r1.1.1.1 -r1.2
--- sys/fs/devfs/devfs_int.h
+++ sys/fs/devfs/devfs_int.h
@@ -22,7 +22,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/devfs/devfs_int.h,v 1.1.2.2 2005/09/26 14:36:52 phk Exp $
+ * $FreeBSD: src/sys/fs/devfs/devfs_int.h,v 1.4 2007/07/03 17:42:36 kib Exp $
  */
 
 /*
@@ -47,11 +47,16 @@
 
 	u_int			cdp_flags;
 #define CDP_ACTIVE		(1 << 0)
+#define CDP_SCHED_DTR		(1 << 1)
 
 	u_int			cdp_inuse;
 	u_int			cdp_maxdirent;
 	struct devfs_dirent	**cdp_dirents;
 	struct devfs_dirent	*cdp_dirent0;
+
+	TAILQ_ENTRY(cdev_priv)	cdp_dtr_list;
+	void			(*cdp_dtr_cb)(void *);
+	void			*cdp_dtr_cb_arg;
 };
 
 struct cdev *devfs_alloc(void);
@@ -61,6 +66,9 @@
 
 extern struct unrhdr *devfs_inos;
 extern struct mtx devmtx;
+extern struct mtx devfs_de_interlock;
+extern struct sx clone_drain_lock;
+extern TAILQ_HEAD(cdev_priv_list, cdev_priv) cdevp_list;
 
 #endif /* _KERNEL */
 
Index: devfs_vfsops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/devfs/devfs_vfsops.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/devfs/devfs_vfsops.c -L sys/fs/devfs/devfs_vfsops.c -u -r1.1.1.1 -r1.2
--- sys/fs/devfs/devfs_vfsops.c
+++ sys/fs/devfs/devfs_vfsops.c
@@ -31,17 +31,13 @@
  *	@(#)kernfs_vfsops.c	8.10 (Berkeley) 5/14/95
  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vfsops.c 1.36
  *
- * $FreeBSD: src/sys/fs/devfs/devfs_vfsops.c,v 1.44.2.3 2005/09/26 14:36:52 phk Exp $
+ * $FreeBSD: src/sys/fs/devfs/devfs_vfsops.c,v 1.52 2006/09/26 04:12:45 tegge Exp $
  */
 
-#include "opt_devfs.h"
-#include "opt_mac.h"
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
@@ -81,12 +77,15 @@
 	fmp = malloc(sizeof *fmp, M_DEVFS, M_WAITOK | M_ZERO);
 	fmp->dm_idx = alloc_unr(devfs_unr);
 	sx_init(&fmp->dm_lock, "devfsmount");
+	fmp->dm_holdcnt = 1;
 
+	MNT_ILOCK(mp);
 	mp->mnt_flag |= MNT_LOCAL;
 	mp->mnt_kern_flag |= MNTK_MPSAFE;
 #ifdef MAC
 	mp->mnt_flag |= MNT_MULTILABEL;
 #endif
+	MNT_IUNLOCK(mp);
 	fmp->dm_mount = mp;
 	mp->mnt_data = (void *) fmp;
 	vfs_getnewfsid(mp);
@@ -108,14 +107,25 @@
 	return (0);
 }
 
+void
+devfs_unmount_final(struct devfs_mount *fmp)
+{
+	sx_destroy(&fmp->dm_lock);
+	free(fmp, M_DEVFS);
+}
+
 static int
 devfs_unmount(struct mount *mp, int mntflags, struct thread *td)
 {
 	int error;
 	int flags = 0;
 	struct devfs_mount *fmp;
+	int hold;
+	u_int idx;
 
 	fmp = VFSTODEVFS(mp);
+	KASSERT(fmp->dm_mount != NULL,
+		("devfs_unmount unmounted devfs_mount"));
 	/* There is 1 extra root vnode reference from devfs_mount(). */
 	error = vflush(mp, 1, flags, td);
 	if (error)
@@ -123,11 +133,14 @@
 	sx_xlock(&fmp->dm_lock);
 	devfs_cleanup(fmp);
 	devfs_rules_cleanup(fmp);
-	sx_xunlock(&fmp->dm_lock);
+	fmp->dm_mount = NULL;
+	hold = --fmp->dm_holdcnt;
 	mp->mnt_data = NULL;
-	sx_destroy(&fmp->dm_lock);
-	free_unr(devfs_unr, fmp->dm_idx);
-	free(fmp, M_DEVFS);
+	idx = fmp->dm_idx;
+	sx_xunlock(&fmp->dm_lock);
+	free_unr(devfs_unr, idx);
+	if (hold == 0)
+		devfs_unmount_final(fmp);
 	return 0;
 }
 
@@ -141,6 +154,7 @@
 	struct devfs_mount *dmp;
 
 	dmp = VFSTODEVFS(mp);
+	sx_xlock(&dmp->dm_lock);
 	error = devfs_allocv(dmp->dm_rootdir, mp, &vp, td);
 	if (error)
 		return (error);
Index: devfs.h
===================================================================
RCS file: /home/cvs/src/sys/fs/devfs/devfs.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/devfs/devfs.h -L sys/fs/devfs/devfs.h -u -r1.1.1.1 -r1.2
--- sys/fs/devfs/devfs.h
+++ sys/fs/devfs/devfs.h
@@ -33,7 +33,7 @@
  *	@(#)kernfs.h	8.6 (Berkeley) 3/29/95
  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs.h 1.14
  *
- * $FreeBSD: src/sys/fs/devfs/devfs.h,v 1.22.2.2 2005/09/26 14:36:52 phk Exp $
+ * $FreeBSD: src/sys/fs/devfs/devfs.h,v 1.31 2006/10/18 11:17:14 kib Exp $
  */
 
 #ifndef _FS_DEVFS_DEVFS_H_
@@ -129,6 +129,8 @@
 #define	DE_WHITEOUT	0x1
 #define	DE_DOT		0x2
 #define	DE_DOTDOT	0x4
+#define DE_DOOMED	0x8
+	int			de_holdcnt;
 	struct dirent 		*de_dirent;
 	TAILQ_ENTRY(devfs_dirent) de_list;
 	TAILQ_HEAD(, devfs_dirent) de_dlist;
@@ -150,6 +152,7 @@
 	struct mount		*dm_mount;
 	struct devfs_dirent	*dm_rootdir;
 	unsigned		dm_generation;
+	int			dm_holdcnt;
 	struct sx		dm_lock;
 	devfs_rsnum		dm_ruleset;
 };
@@ -160,15 +163,21 @@
 
 #define VFSTODEVFS(mp)	((struct devfs_mount *)((mp)->mnt_data))
 
+#define DEVFS_DE_HOLD(de)	((de)->de_holdcnt++)
+#define DEVFS_DE_DROP(de)	(--(de)->de_holdcnt == 0)
+
+#define DEVFS_DMP_HOLD(dmp)	((dmp)->dm_holdcnt++)
+#define DEVFS_DMP_DROP(dmp)	(--(dmp)->dm_holdcnt == 0)
+
 void devfs_rules_apply(struct devfs_mount *dm, struct devfs_dirent *de);
 void devfs_rules_cleanup (struct devfs_mount *dm);
 int devfs_rules_ioctl(struct devfs_mount *dm, u_long cmd, caddr_t data, struct thread *td);
 int devfs_allocv (struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td);
-struct cdev **devfs_itod (int inode);
-struct devfs_dirent **devfs_itode (struct devfs_mount *dm, int inode);
-void devfs_delete(struct devfs_mount *dm, struct devfs_dirent *de);
+void devfs_delete(struct devfs_mount *dm, struct devfs_dirent *de, int vp_locked);
+void devfs_dirent_free(struct devfs_dirent *de);
 void devfs_populate (struct devfs_mount *dm);
 void devfs_cleanup (struct devfs_mount *dm);
+void devfs_unmount_final(struct devfs_mount *mp);
 struct devfs_dirent *devfs_newdirent (char *name, int namelen);
 struct devfs_dirent *devfs_vmkdir (struct devfs_mount *, char *name, int namelen, struct devfs_dirent *dotdot, u_int inode);
 struct devfs_dirent *devfs_find (struct devfs_dirent *dd, const char *name, int namelen);
Index: devfs_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/devfs/devfs_vnops.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/devfs/devfs_vnops.c -L sys/fs/devfs/devfs_vnops.c -u -r1.1.1.1 -r1.2
--- sys/fs/devfs/devfs_vnops.c
+++ sys/fs/devfs/devfs_vnops.c
@@ -31,7 +31,7 @@
  *	@(#)kernfs_vnops.c	8.15 (Berkeley) 5/21/95
  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43
  *
- * $FreeBSD: src/sys/fs/devfs/devfs_vnops.c,v 1.114.2.7 2005/11/12 21:21:27 dwhite Exp $
+ * $FreeBSD: src/sys/fs/devfs/devfs_vnops.c,v 1.149.4.1 2008/01/12 00:13:33 jhb Exp $
  */
 
 /*
@@ -40,8 +40,7 @@
  *	mkdir: want it ?
  */
 
-#include <opt_devfs.h>
-#include <opt_mac.h>
+#include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -53,10 +52,10 @@
 #include <sys/filio.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
@@ -72,16 +71,25 @@
 #include <fs/devfs/devfs.h>
 #include <fs/devfs/devfs_int.h>
 
+#include <security/mac/mac_framework.h>
+
+struct mtx	devfs_de_interlock;
+MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF);
+struct sx	clone_drain_lock;
+SX_SYSINIT(clone_drain_lock, &clone_drain_lock, "clone events drain lock");
+
 static int
 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp)
 {
 
-	*devp = fp->f_vnode->v_rdev;
-	if (*devp != fp->f_data)
+	*dswp = devvn_refthread(fp->f_vnode, devp);
+	if (*devp != fp->f_data) {
+		if (*dswp != NULL)
+			dev_relthread(*devp);
 		return (ENXIO);
+	}
 	KASSERT((*devp)->si_refcount > 0,
 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp)));
-	*dswp = dev_refthread(*devp);
 	if (*dswp == NULL)
 		return (ENXIO);
 	return (0);
@@ -123,31 +131,98 @@
 	return (buf + i);
 }
 
+static int
+devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp,
+	struct devfs_dirent *de)
+{
+	int not_found;
+
+	not_found = 0;
+	if (de->de_flags & DE_DOOMED)
+		not_found = 1;
+	if (DEVFS_DE_DROP(de)) {
+		KASSERT(not_found == 1, ("DEVFS de dropped but not doomed"));
+		devfs_dirent_free(de);
+	}
+	if (DEVFS_DMP_DROP(dmp)) {
+		KASSERT(not_found == 1,
+			("DEVFS mount struct freed before dirent"));
+		not_found = 2;
+		sx_xunlock(&dmp->dm_lock);
+		devfs_unmount_final(dmp);
+	}
+	if (not_found == 1 || (drop_dm_lock && not_found != 2))
+		sx_unlock(&dmp->dm_lock);
+	return (not_found);
+}
+
+static void
+devfs_insmntque_dtr(struct vnode *vp, void *arg)
+{
+	struct devfs_dirent *de;
+
+	de = (struct devfs_dirent *)arg;
+	mtx_lock(&devfs_de_interlock);
+	vp->v_data = NULL;
+	de->de_vnode = NULL;
+	mtx_unlock(&devfs_de_interlock);
+	vgone(vp);
+	vput(vp);
+}
+
+/*
+ * devfs_allocv shall be entered with dmp->dm_lock held, and it drops
+ * it on return.
+ */
 int
 devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td)
 {
 	int error;
 	struct vnode *vp;
 	struct cdev *dev;
+	struct devfs_mount *dmp;
 
 	KASSERT(td == curthread, ("devfs_allocv: td != curthread"));
-loop:
+	dmp = VFSTODEVFS(mp);
+	if (de->de_flags & DE_DOOMED) {
+		sx_xunlock(&dmp->dm_lock);
+		return (ENOENT);
+	}
+ loop:
+	DEVFS_DE_HOLD(de);
+	DEVFS_DMP_HOLD(dmp);
+	mtx_lock(&devfs_de_interlock);
 	vp = de->de_vnode;
 	if (vp != NULL) {
-		if (vget(vp, LK_EXCLUSIVE, td))
+		VI_LOCK(vp);
+		mtx_unlock(&devfs_de_interlock);
+		sx_xunlock(&dmp->dm_lock);
+		error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td);
+		sx_xlock(&dmp->dm_lock);
+		if (devfs_allocv_drop_refs(0, dmp, de)) {
+			if (error == 0)
+				vput(vp);
+			return (ENOENT);
+		}
+		else if (error)
 			goto loop;
+		sx_xunlock(&dmp->dm_lock);
 		*vpp = vp;
 		return (0);
 	}
+	mtx_unlock(&devfs_de_interlock);
 	if (de->de_dirent->d_type == DT_CHR) {
-		if (!(de->de_cdp->cdp_flags & CDP_ACTIVE))
+		if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) {
+			devfs_allocv_drop_refs(1, dmp, de);
 			return (ENOENT);
+		}
 		dev = &de->de_cdp->cdp_c;
 	} else {
 		dev = NULL;
 	}
 	error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp);
 	if (error != 0) {
+		devfs_allocv_drop_refs(1, dmp, de);
 		printf("devfs_allocv: failed to allocate new vnode\n");
 		return (error);
 	}
@@ -157,6 +232,7 @@
 		VI_LOCK(vp);
 		dev_lock();
 		dev_refl(dev);
+		/* XXX: v_rdev should be protect by vnode lock */
 		vp->v_rdev = dev;
 		KASSERT(vp->v_usecount == 1,
 		    ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount));
@@ -171,12 +247,24 @@
 	} else {
 		vp->v_type = VBAD;
 	}
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
+	mtx_lock(&devfs_de_interlock);
 	vp->v_data = de;
 	de->de_vnode = vp;
-	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
+	mtx_unlock(&devfs_de_interlock);
+	error = insmntque1(vp, mp, devfs_insmntque_dtr, de);
+	if (error != 0) {
+		(void) devfs_allocv_drop_refs(1, dmp, de);
+		return (error);
+	}
+	if (devfs_allocv_drop_refs(0, dmp, de)) {
+		vput(vp);
+		return (ENOENT);
+	}
 #ifdef MAC
 	mac_associate_vnode_devfs(mp, de, vp);
 #endif
+	sx_xunlock(&dmp->dm_lock);
 	*vpp = vp;
 	return (0);
 }
@@ -222,7 +310,7 @@
 	struct thread *td = ap->a_td;
 	struct cdev *dev = vp->v_rdev;
 	struct cdevsw *dsw;
-	int error;
+	int vp_locked, error;
 
 	/*
 	 * Hack: a tty device that is a controlling terminal
@@ -233,12 +321,6 @@
 	 * if the reference count is 2 (this last descriptor
 	 * plus the session), release the reference from the session.
 	 */
-
-	/*
-	 * This needs to be rewritten to take the vp interlock into
-	 * consideration.
-	 */
-
 	oldvp = NULL;
 	sx_xlock(&proctree_lock);
 	if (td && vp == td->td_proc->p_session->s_ttyvp) {
@@ -276,7 +358,10 @@
 		dev_relthread(dev);
 		return (0);
 	}
+	vholdl(vp);
 	VI_UNLOCK(vp);
+	vp_locked = VOP_ISLOCKED(vp, td);
+	VOP_UNLOCK(vp, 0, td);
 	KASSERT(dev->si_refcount > 0,
 	    ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev)));
 	if (!(dsw->d_flags & D_NEEDGIANT)) {
@@ -287,6 +372,8 @@
 		error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
 	}
 	dev_relthread(dev);
+	vn_lock(vp, vp_locked | LK_RETRY, td);
+	vdrop(vp);
 	return (error);
 }
 
@@ -341,7 +428,7 @@
 
 #define fix(aa)							\
 	do {							\
-		if ((aa).tv_sec == 0) {				\
+		if ((aa).tv_sec <= 3600) {			\
 			(aa).tv_sec = boottime.tv_sec;		\
 			(aa).tv_nsec = boottime.tv_usec * 1000; \
 		}						\
@@ -418,7 +505,7 @@
 			return (0);
 		}
 
-		mtx_lock(&Giant);
+		mtx_lock(&Giant);	/* XXX TTY */
 
 		vpold = td->td_proc->p_session->s_ttyvp;
 		VREF(vp);
@@ -431,7 +518,7 @@
 		/* Get rid of reference to old control tty */
 		if (vpold)
 			vrele(vpold);
-		mtx_unlock(&Giant);
+		mtx_unlock(&Giant);	/* XXX TTY */
 	}
 	return (error);
 }
@@ -453,7 +540,7 @@
 }
 
 static int
-devfs_lookupx(struct vop_lookup_args *ap)
+devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock)
 {
 	struct componentname *cnp;
 	struct vnode *dvp, **vpp;
@@ -505,11 +592,19 @@
 		de = TAILQ_NEXT(de, de_list);		/* ".." */
 		de = de->de_dir;
 		error = devfs_allocv(de, dvp->v_mount, vpp, td);
+		*dm_unlock = 0;
 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
 		return (error);
 	}
 
+	DEVFS_DMP_HOLD(dmp);
 	devfs_populate(dmp);
+	if (DEVFS_DMP_DROP(dmp)) {
+		*dm_unlock = 0;
+		sx_xunlock(&dmp->dm_lock);
+		devfs_unmount_final(dmp);
+		return (ENOENT);
+	}
 	dd = dvp->v_data;
 	de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen);
 	while (de == NULL) {	/* While(...) so we can use break */
@@ -526,12 +621,30 @@
 			break;
 
 		cdev = NULL;
+		DEVFS_DMP_HOLD(dmp);
+		sx_xunlock(&dmp->dm_lock);
+		sx_slock(&clone_drain_lock);
 		EVENTHANDLER_INVOKE(dev_clone,
 		    td->td_ucred, pname, strlen(pname), &cdev);
+		sx_sunlock(&clone_drain_lock);
+		sx_xlock(&dmp->dm_lock);
+		if (DEVFS_DMP_DROP(dmp)) {
+			*dm_unlock = 0;
+			sx_xunlock(&dmp->dm_lock);
+			devfs_unmount_final(dmp);
+			return (ENOENT);
+		}
 		if (cdev == NULL)
 			break;
 
+		DEVFS_DMP_HOLD(dmp);
 		devfs_populate(dmp);
+		if (DEVFS_DMP_DROP(dmp)) {
+			*dm_unlock = 0;
+			sx_xunlock(&dmp->dm_lock);
+			devfs_unmount_final(dmp);
+			return (ENOENT);
+		}
 
 		dev_lock();
 		dde = &cdev->si_priv->cdp_dirents[dmp->dm_idx];
@@ -562,6 +675,7 @@
 		}
 	}
 	error = devfs_allocv(de, dvp->v_mount, vpp, td);
+	*dm_unlock = 0;
 	return (error);
 }
 
@@ -570,11 +684,14 @@
 {
 	int j;
 	struct devfs_mount *dmp;
+	int dm_unlock;
 
 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
+	dm_unlock = 1;
 	sx_xlock(&dmp->dm_lock);
-	j = devfs_lookupx(ap);
-	sx_xunlock(&dmp->dm_lock);
+	j = devfs_lookupx(ap, &dm_unlock);
+	if (dm_unlock == 1)
+		sx_xunlock(&dmp->dm_lock);
 	return (j);
 }
 
@@ -596,7 +713,6 @@
 		return (EOPNOTSUPP);
 	dvp = ap->a_dvp;
 	dmp = VFSTODEVFS(dvp->v_mount);
-	sx_xlock(&dmp->dm_lock);
 
 	cnp = ap->a_cnp;
 	vpp = ap->a_vpp;
@@ -604,6 +720,7 @@
 	dd = dvp->v_data;
 
 	error = ENOENT;
+	sx_xlock(&dmp->dm_lock);
 	TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
 		if (cnp->cn_namelen != de->de_dirent->d_namlen)
 			continue;
@@ -618,6 +735,7 @@
 		goto notfound;
 	de->de_flags &= ~DE_WHITEOUT;
 	error = devfs_allocv(de, dvp->v_mount, vpp, td);
+	return (error);
 notfound:
 	sx_xunlock(&dmp->dm_lock);
 	return (error);
@@ -630,7 +748,7 @@
 	struct thread *td = ap->a_td;
 	struct vnode *vp = ap->a_vp;
 	struct cdev *dev = vp->v_rdev;
-	struct file *fp;
+	struct file *fp = ap->a_fp;
 	int error;
 	struct cdevsw *dsw;
 
@@ -644,18 +762,6 @@
 	if (dev->si_iosize_max == 0)
 		dev->si_iosize_max = DFLTPHYS;
 
-	if (vn_isdisk(vp, NULL) &&
-	    ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
-		/*
-		* When running in very secure mode, do not allow
-		* opens for writing of any disks.
-		* XXX: should be in geom_dev.c, but we lack the cred there.
-		*/
-		error = securelevel_ge(td->td_ucred, 2);
-		if (error)
-			return (error);
-	}
-
 	dsw = dev_refthread(dev);
 	if (dsw == NULL)
 		return (ENXIO);
@@ -669,13 +775,13 @@
 	if(!(dsw->d_flags & D_NEEDGIANT)) {
 		DROP_GIANT();
 		if (dsw->d_fdopen != NULL)
-			error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx);
+			error = dsw->d_fdopen(dev, ap->a_mode, td, fp);
 		else
 			error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
 		PICKUP_GIANT();
 	} else {
 		if (dsw->d_fdopen != NULL)
-			error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx);
+			error = dsw->d_fdopen(dev, ap->a_mode, td, fp);
 		else
 			error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
 	}
@@ -688,23 +794,18 @@
 		return (error);
 
 #if 0	/* /dev/console */
-	KASSERT(ap->a_fdidx >= 0,
-	     ("Could not vnode bypass device on fd %d", ap->a_fdidx));
+	KASSERT(fp != NULL,
+	     ("Could not vnode bypass device on NULL fp"));
 #else
-	if(ap->a_fdidx < 0)
+	if(fp == NULL)
 		return (error);
 #endif
-	/*
-	 * This is a pretty disgustingly long chain, but I am not
-	 * sure there is any better way.  Passing the fdidx into
-	 * VOP_OPEN() offers us more information than just passing
-	 * the file *.
-	 */
-	fp = ap->a_td->td_proc->p_fd->fd_ofiles[ap->a_fdidx];
+	FILE_LOCK(fp);
 	KASSERT(fp->f_ops == &badfileops,
 	     ("Could not vnode bypass device on fdops %p", fp->f_ops));
-	fp->f_ops = &devfs_ops_f;
 	fp->f_data = dev;
+	fp->f_ops = &devfs_ops_f;
+	FILE_UNLOCK(fp);
 	return (error);
 }
 
@@ -777,9 +878,9 @@
 		uio->uio_offset = fp->f_offset;
 
 	error = dsw->d_read(dev, uio, ioflag);
-	dev_relthread(dev);
 	if (uio->uio_resid != resid || (error == 0 && resid != 0))
 		vfs_timestamp(&dev->si_atime);
+	dev_relthread(dev);
 
 	if ((flags & FOF_OFFSET) == 0)
 		fp->f_offset = uio->uio_offset;
@@ -823,7 +924,15 @@
 
 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
 	sx_xlock(&dmp->dm_lock);
+	DEVFS_DMP_HOLD(dmp);
 	devfs_populate(dmp);
+	if (DEVFS_DMP_DROP(dmp)) {
+		sx_xunlock(&dmp->dm_lock);
+		devfs_unmount_final(dmp);
+		if (tmp_ncookies != NULL)
+			ap->a_ncookies = tmp_ncookies;
+		return (EIO);
+	}
 	error = 0;
 	de = ap->a_vp->v_data;
 	off = 0;
@@ -876,21 +985,30 @@
 	struct devfs_dirent *de;
 	struct cdev *dev;
 
+	mtx_lock(&devfs_de_interlock);
 	de = vp->v_data;
-	if (de != NULL)
+	if (de != NULL) {
 		de->de_vnode = NULL;
-	vp->v_data = NULL;
+		vp->v_data = NULL;
+	}
+	mtx_unlock(&devfs_de_interlock);
+
 	vnode_destroy_vobject(vp);
 
+	VI_LOCK(vp);
+	dev_lock();
 	dev = vp->v_rdev;
 	vp->v_rdev = NULL;
 
-	if (dev == NULL)
+	if (dev == NULL) {
+		dev_unlock();
+		VI_UNLOCK(vp);
 		return (0);
+	}
 
-	dev_lock();
 	dev->si_usecount -= vp->v_usecount;
 	dev_unlock();
+	VI_UNLOCK(vp);
 	dev_rel(dev);
 	return (0);
 }
@@ -908,7 +1026,7 @@
 	de = vp->v_data;
 	if (de->de_cdp == NULL) {
 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
-		devfs_delete(dmp, de);
+		devfs_delete(dmp, de, 1);
 	} else {
 		de->de_flags |= DE_WHITEOUT;
 	}
@@ -921,10 +1039,6 @@
  * is orphaned by setting v_op to deadfs so we need to let go of it
  * as well so that we create a new one next time around.
  *
- * XXX: locking :-(
- * XXX: We mess around with other mountpoints without holding their sxlock.
- * XXX: We hold the devlock() when we zero their vnode pointer, but is that
- * XXX: enough ?
  */
 static int
 devfs_revoke(struct vop_revoke_args *ap)
@@ -939,25 +1053,58 @@
 
 	dev = vp->v_rdev;
 	cdp = dev->si_priv;
+ 
+	dev_lock();
+	cdp->cdp_inuse++;
+	dev_unlock();
+
+	vhold(vp);
+	vgone(vp);
+	vdrop(vp);
+
+	VOP_UNLOCK(vp,0,curthread);
+ loop:
 	for (;;) {
+		mtx_lock(&devfs_de_interlock);
 		dev_lock();
 		vp2 = NULL;
 		for (i = 0; i <= cdp->cdp_maxdirent; i++) {
 			de = cdp->cdp_dirents[i];
 			if (de == NULL)
 				continue;
+
 			vp2 = de->de_vnode;
-			de->de_vnode = NULL;
-			if (vp2 != NULL)
+			if (vp2 != NULL) {
+				dev_unlock();
+				VI_LOCK(vp2);
+				mtx_unlock(&devfs_de_interlock);
+				if (vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK,
+				    curthread))
+					goto loop;
+				vhold(vp2);
+				vgone(vp2);
+				vdrop(vp2);
+				vput(vp2);
 				break;
+			} 
 		}
-		dev_unlock();
 		if (vp2 != NULL) {
-			vgone(vp2);
 			continue;
 		}
+		dev_unlock();
+		mtx_unlock(&devfs_de_interlock);
 		break;
 	}
+	dev_lock();
+	cdp->cdp_inuse--;
+	if (!(cdp->cdp_flags & CDP_ACTIVE) && cdp->cdp_inuse == 0) {
+		TAILQ_REMOVE(&cdevp_list, cdp, cdp_list);
+		dev_unlock();
+		dev_rel(&cdp->cdp_c);
+	} else
+		dev_unlock();
+
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
 	return (0);
 }
 
@@ -969,7 +1116,13 @@
 
 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
 	sx_xlock(&dmp->dm_lock);
+	DEVFS_DMP_HOLD(dmp);
 	devfs_populate(dmp);
+	if (DEVFS_DMP_DROP(dmp)) {
+		sx_xunlock(&dmp->dm_lock);
+		devfs_unmount_final(dmp);
+		return (ENOENT);
+	}
 	error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td);
 	sx_xunlock(&dmp->dm_lock);
 	return (error);
@@ -1022,19 +1175,23 @@
 	else
 		gid = vap->va_gid;
 	if (uid != de->de_uid || gid != de->de_gid) {
-		if (((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid ||
-		    (gid != de->de_gid && !groupmember(gid, ap->a_cred))) &&
-		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)) != 0)
-			return (error);
+		if ((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid ||
+		    (gid != de->de_gid && !groupmember(gid, ap->a_cred))) {
+			error = priv_check(ap->a_td, PRIV_VFS_CHOWN);
+			if (error)
+				return (error);
+		}
 		de->de_uid = uid;
 		de->de_gid = gid;
 		c = 1;
 	}
 
 	if (vap->va_mode != (mode_t)VNOVAL) {
-		if ((ap->a_cred->cr_uid != de->de_uid) &&
-		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)))
-			return (error);
+		if (ap->a_cred->cr_uid != de->de_uid) {
+			error = priv_check(ap->a_td, PRIV_VFS_ADMIN);
+			if (error)
+				return (error);
+		}
 		de->de_mode = vap->va_mode;
 		c = 1;
 	}
@@ -1080,7 +1237,7 @@
 	de = vp->v_data;
 
 	mac_relabel_vnode(ap->a_cred, vp, ap->a_label);
-	mac_update_devfsdirent(vp->v_mount, de, vp);
+	mac_update_devfs(vp->v_mount, de, vp);
 
 	return (0);
 }
@@ -1104,7 +1261,8 @@
 
 	td = ap->a_cnp->cn_thread;
 	KASSERT(td == curthread, ("devfs_symlink: td != curthread"));
-	error = suser(td);
+
+	error = priv_check(td, PRIV_DEVFS_SYMLINK);
 	if (error)
 		return(error);
 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
@@ -1123,9 +1281,7 @@
 	mac_create_devfs_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de);
 #endif
 	TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list);
-	devfs_allocv(de, ap->a_dvp->v_mount, ap->a_vpp, td);
-	sx_xunlock(&dmp->dm_lock);
-	return (0);
+	return (devfs_allocv(de, ap->a_dvp->v_mount, ap->a_vpp, td));
 }
 
 /* ARGSUSED */
@@ -1149,11 +1305,11 @@
 	resid = uio->uio_resid;
 
 	error = dsw->d_write(dev, uio, ioflag);
-	dev_relthread(dev);
 	if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
 		vfs_timestamp(&dev->si_ctime);
 		dev->si_mtime = dev->si_ctime;
 	}
+	dev_relthread(dev);
 
 	if ((flags & FOF_OFFSET) == 0)
 		fp->f_offset = uio->uio_offset;
Index: devfs_devs.c
===================================================================
RCS file: /home/cvs/src/sys/fs/devfs/devfs_devs.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/devfs/devfs_devs.c -L sys/fs/devfs/devfs_devs.c -u -r1.2 -r1.3
--- sys/fs/devfs/devfs_devs.c
+++ sys/fs/devfs/devfs_devs.c
@@ -25,10 +25,9 @@
  *
  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vfsops.c 1.36
  *
- * $FreeBSD: src/sys/fs/devfs/devfs_devs.c,v 1.36.2.4 2006/03/13 03:05:06 jeff Exp $
+ * $FreeBSD: src/sys/fs/devfs/devfs_devs.c,v 1.50 2007/04/23 13:36:52 rwatson Exp $
  */
 
-#include "opt_devfs.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
@@ -38,7 +37,6 @@
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/sx.h>
@@ -50,11 +48,13 @@
 #include <fs/devfs/devfs.h>
 #include <fs/devfs/devfs_int.h>
 
+#include <security/mac/mac_framework.h>
+
 /*
  * The one true (but secret) list of active devices in the system.
  * Locked by dev_lock()/devmtx
  */
-static TAILQ_HEAD(,cdev_priv) cdevp_list = TAILQ_HEAD_INITIALIZER(cdevp_list);
+struct cdev_priv_list cdevp_list = TAILQ_HEAD_INITIALIZER(cdevp_list);
 
 struct unrhdr *devfs_inos;
 
@@ -180,8 +180,9 @@
 	vfs_timestamp(&de->de_ctime);
 	de->de_mtime = de->de_atime = de->de_ctime;
 	de->de_links = 1;
+	de->de_holdcnt = 1;
 #ifdef MAC
-	mac_init_devfsdirent(de);
+	mac_init_devfs(de);
 #endif
 	return (de);
 }
@@ -231,33 +232,62 @@
 }
 
 void
-devfs_delete(struct devfs_mount *dm, struct devfs_dirent *de)
+devfs_dirent_free(struct devfs_dirent *de)
+{
+	free(de, M_DEVFS3);
+}
+
+/*
+ * The caller needs to hold the dm for the duration of the call since
+ * dm->dm_lock may be temporary dropped.
+ */
+void
+devfs_delete(struct devfs_mount *dm, struct devfs_dirent *de, int vp_locked)
 {
+	struct vnode *vp;
+	struct thread *td;
 
+	KASSERT((de->de_flags & DE_DOOMED) == 0,
+		("devfs_delete doomed dirent"));
+	td = curthread;
+	de->de_flags |= DE_DOOMED;
+	mtx_lock(&devfs_de_interlock);
+	vp = de->de_vnode;
+	if (vp != NULL) {
+		VI_LOCK(vp);
+		mtx_unlock(&devfs_de_interlock);
+		vholdl(vp);
+		sx_unlock(&dm->dm_lock);
+		if (!vp_locked)
+			vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, td);
+		else
+			VI_UNLOCK(vp);
+		vgone(vp);
+		if (!vp_locked)
+			VOP_UNLOCK(vp, 0, td);
+		vdrop(vp);
+		sx_xlock(&dm->dm_lock);
+	} else
+		mtx_unlock(&devfs_de_interlock);
 	if (de->de_symlink) {
 		free(de->de_symlink, M_DEVFS);
 		de->de_symlink = NULL;
 	}
-	if (de->de_vnode != NULL) {
-		vhold(de->de_vnode);
-		de->de_vnode->v_data = NULL;
-		vgone(de->de_vnode);
-		vdrop(de->de_vnode);
-		de->de_vnode = NULL;
-	}
 #ifdef MAC
-	mac_destroy_devfsdirent(de);
+	mac_destroy_devfs(de);
 #endif
 	if (de->de_inode > DEVFS_ROOTINO) {
 		free_unr(devfs_inos, de->de_inode);
 		de->de_inode = 0;
 	}
-	free(de, M_DEVFS3);
+	if (DEVFS_DE_DROP(de))
+		devfs_dirent_free(de);
 }
 
 /*
  * Called on unmount.
- * Recursively removes the entire tree
+ * Recursively removes the entire tree.
+ * The caller needs to hold the dm for the duration of the call.
  */
 
 static void
@@ -272,13 +302,13 @@
 			break;
 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
 		if (de->de_flags & (DE_DOT|DE_DOTDOT))
-			devfs_delete(dm, de);
+			devfs_delete(dm, de, 0);
 		else if (de->de_dirent->d_type == DT_DIR)
 			devfs_purge(dm, de);
 		else 
-			devfs_delete(dm, de);
+			devfs_delete(dm, de, 0);
 	}
-	devfs_delete(dm, dd);
+	devfs_delete(dm, dd, 0);
 }
 
 /*
@@ -314,6 +344,9 @@
 	dev_unlock();
 }
 
+/*
+ * The caller needs to hold the dm for the duration of the call.
+ */
 static int
 devfs_populate_loop(struct devfs_mount *dm, int cleanup)
 {
@@ -339,7 +372,6 @@
 		    cdp->cdp_dirents[dm->dm_idx] != NULL) {
 			de = cdp->cdp_dirents[dm->dm_idx];
 			cdp->cdp_dirents[dm->dm_idx] = NULL;
-			cdp->cdp_inuse--;
 			KASSERT(cdp == de->de_cdp,
 			    ("%s %d %s %p %p", __func__, __LINE__,
 			    cdp->cdp_c.si_name, cdp, de->de_cdp));
@@ -349,7 +381,10 @@
 			TAILQ_REMOVE(&de->de_dir->de_dlist, de, de_list);
 			de->de_cdp = NULL;
 			de->de_inode = 0;
-			devfs_delete(dm, de);
+			devfs_delete(dm, de, 0);
+			dev_lock();
+			cdp->cdp_inuse--;
+			dev_unlock();
 			return (1);
 		}
 		/*
@@ -437,6 +472,9 @@
 	return (0);
 }
 
+/*
+ * The caller needs to hold the dm for the duration of the call.
+ */
 void
 devfs_populate(struct devfs_mount *dm)
 {
@@ -449,6 +487,9 @@
 	dm->dm_generation = devfs_generation;
 }
 
+/*
+ * The caller needs to hold the dm for the duration of the call.
+ */
 void
 devfs_cleanup(struct devfs_mount *dm)
 {
Index: dead_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/deadfs/dead_vnops.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/deadfs/dead_vnops.c -L sys/fs/deadfs/dead_vnops.c -u -r1.2 -r1.3
--- sys/fs/deadfs/dead_vnops.c
+++ sys/fs/deadfs/dead_vnops.c
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)dead_vnops.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/fs/deadfs/dead_vnops.c,v 1.48.2.1 2006/03/13 03:05:02 jeff Exp $
+ * $FreeBSD: src/sys/fs/deadfs/dead_vnops.c,v 1.50 2007/01/22 11:25:22 kib Exp $
  */
 
 #include <sys/param.h>
@@ -49,6 +49,7 @@
 static vop_read_t	dead_read;
 static vop_write_t	dead_write;
 static vop_getwritemount_t dead_getwritemount;
+static vop_rename_t	dead_rename;
 
 struct vop_vector dead_vnodeops = {
 	.vop_default =		&default_vnodeops,
@@ -73,7 +74,7 @@
 	.vop_readlink =		VOP_EBADF,
 	.vop_reclaim =		VOP_NULL,
 	.vop_remove =		VOP_PANIC,
-	.vop_rename =		VOP_PANIC,
+	.vop_rename =		dead_rename,
 	.vop_rmdir =		VOP_PANIC,
 	.vop_setattr =		VOP_EBADF,
 	.vop_symlink =		VOP_PANIC,
@@ -211,3 +212,25 @@
 {
 	return (POLLHUP);
 }
+
+static int
+dead_rename(ap)
+	struct vop_rename_args  /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	if (ap->a_tvp)
+		vput(ap->a_tvp);
+	if (ap->a_tdvp == ap->a_tvp)
+		vrele(ap->a_tdvp);
+	else
+		vput(ap->a_tdvp);
+	vrele(ap->a_fdvp);
+	vrele(ap->a_fvp);
+	return (EXDEV);
+}
Index: fdesc_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/fdescfs/fdesc_vnops.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/fdescfs/fdesc_vnops.c -L sys/fs/fdescfs/fdesc_vnops.c -u -r1.2 -r1.3
--- sys/fs/fdescfs/fdesc_vnops.c
+++ sys/fs/fdescfs/fdesc_vnops.c
@@ -31,7 +31,7 @@
  *
  *	@(#)fdesc_vnops.c	8.9 (Berkeley) 1/21/94
  *
- * $FreeBSD: src/sys/fs/fdescfs/fdesc_vnops.c,v 1.99.2.2 2006/03/22 17:39:27 tegge Exp $
+ * $FreeBSD: src/sys/fs/fdescfs/fdesc_vnops.c,v 1.104 2007/04/04 09:11:32 rwatson Exp $
  */
 
 /*
@@ -74,7 +74,19 @@
 static vop_reclaim_t	fdesc_reclaim;
 static vop_setattr_t	fdesc_setattr;
 
-extern struct vop_vector fdesc_vnodeops;
+static struct vop_vector fdesc_vnodeops = {
+	.vop_default =		&default_vnodeops,
+
+	.vop_access =		VOP_NULL,
+	.vop_getattr =		fdesc_getattr,
+	.vop_inactive =		fdesc_inactive,
+	.vop_lookup =		fdesc_lookup,
+	.vop_open =		fdesc_open,
+	.vop_pathconf =		vop_stdpathconf,
+	.vop_readdir =		fdesc_readdir,
+	.vop_reclaim =		fdesc_reclaim,
+	.vop_setattr =		fdesc_setattr,
+};
 
 /*
  * Initialise cache headers
@@ -139,6 +151,13 @@
 	fd->fd_type = ftype;
 	fd->fd_fd = -1;
 	fd->fd_ix = ix;
+	/* XXX: vnode should be locked here */
+	error = insmntque(*vpp, mp); /* XXX: Too early for mpsafe fs */
+	if (error != 0) {
+		free(fd, M_TEMP);
+		*vpp = NULLVP;
+		goto out;
+	}
 	LIST_INSERT_HEAD(fc, fd, fd_hash);
 
 out:
@@ -438,7 +457,7 @@
 
 	fcnt = i - 2;		/* The first two nodes are `.' and `..' */
 
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_SLOCK(fdp);
 	while (i < fdp->fd_nfiles + 2 && uio->uio_resid >= UIO_MX) {
 		switch (i) {
 		case 0:	/* `.' */
@@ -454,7 +473,7 @@
 			break;
 		default:
 			if (fdp->fd_ofiles[fcnt] == NULL) {
-				FILEDESC_UNLOCK_FAST(fdp);
+				FILEDESC_SUNLOCK(fdp);
 				goto done;
 			}
 
@@ -468,15 +487,15 @@
 		/*
 		 * And ship to userland
 		 */
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_SUNLOCK(fdp);
 		error = uiomove(dp, UIO_MX, uio);
 		if (error)
 			goto done;
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_SLOCK(fdp);
 		i++;
 		fcnt++;
 	}
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_SUNLOCK(fdp);
 
 done:
 	uio->uio_offset = i * UIO_MX;
@@ -515,17 +534,3 @@
 
 	return (0);
 }
-
-static struct vop_vector fdesc_vnodeops = {
-	.vop_default =		&default_vnodeops,
-
-	.vop_access =		VOP_NULL,
-	.vop_getattr =		fdesc_getattr,
-	.vop_inactive =		fdesc_inactive,
-	.vop_lookup =		fdesc_lookup,
-	.vop_open =		fdesc_open,
-	.vop_pathconf =		vop_stdpathconf,
-	.vop_readdir =		fdesc_readdir,
-	.vop_reclaim =		fdesc_reclaim,
-	.vop_setattr =		fdesc_setattr,
-};
Index: fdesc_vfsops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/fdescfs/fdesc_vfsops.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/fdescfs/fdesc_vfsops.c -L sys/fs/fdescfs/fdesc_vfsops.c -u -r1.1.1.1 -r1.2
--- sys/fs/fdescfs/fdesc_vfsops.c
+++ sys/fs/fdescfs/fdesc_vfsops.c
@@ -31,7 +31,7 @@
  *
  *	@(#)fdesc_vfsops.c	8.4 (Berkeley) 1/21/94
  *
- * $FreeBSD: src/sys/fs/fdescfs/fdesc_vfsops.c,v 1.53 2005/03/24 07:36:13 jeff Exp $
+ * $FreeBSD: src/sys/fs/fdescfs/fdesc_vfsops.c,v 1.56 2007/04/04 09:11:32 rwatson Exp $
  */
 
 /*
@@ -52,14 +52,24 @@
 
 #include <fs/fdescfs/fdesc.h>
 
-static MALLOC_DEFINE(M_FDESCMNT, "FDESC mount", "FDESC mount structure");
+static MALLOC_DEFINE(M_FDESCMNT, "fdesc_mount", "FDESC mount structure");
 
+static vfs_cmount_t	fdesc_cmount;
 static vfs_mount_t	fdesc_mount;
 static vfs_unmount_t	fdesc_unmount;
 static vfs_statfs_t	fdesc_statfs;
 static vfs_root_t	fdesc_root;
 
 /*
+ * Compatibility shim for old mount(2) system call.
+ */
+int
+fdesc_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
+{
+	return kernel_mount(ma, flags);
+}
+
+/*
  * Mount the per-process file descriptors (/dev/fd)
  */
 static int
@@ -166,7 +176,7 @@
 	lim = lim_cur(td->td_proc, RLIMIT_NOFILE);
 	PROC_UNLOCK(td->td_proc);
 	fdp = td->td_proc->p_fd;
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_SLOCK(fdp);
 	last = min(fdp->fd_nfiles, lim);
 	freefd = 0;
 	for (i = fdp->fd_freefile; i < last; i++)
@@ -179,7 +189,7 @@
 	 */
 	if (fdp->fd_nfiles < lim)
 		freefd += (lim - fdp->fd_nfiles);
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_SUNLOCK(fdp);
 
 	sbp->f_flags = 0;
 	sbp->f_bsize = DEV_BSIZE;
@@ -193,6 +203,7 @@
 }
 
 static struct vfsops fdesc_vfsops = {
+	.vfs_cmount =		fdesc_cmount,
 	.vfs_init =		fdesc_init,
 	.vfs_mount =		fdesc_mount,
 	.vfs_root =		fdesc_root,
Index: fifo_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/fifofs/fifo_vnops.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -L sys/fs/fifofs/fifo_vnops.c -L sys/fs/fifofs/fifo_vnops.c -u -r1.4 -r1.5
--- sys/fs/fifofs/fifo_vnops.c
+++ sys/fs/fifofs/fifo_vnops.c
@@ -29,7 +29,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)fifo_vnops.c	8.10 (Berkeley) 5/27/95
- * $FreeBSD: src/sys/fs/fifofs/fifo_vnops.c,v 1.113.2.19 2006/03/28 12:42:20 rwatson Exp $
+ * $FreeBSD: src/sys/fs/fifofs/fifo_vnops.c,v 1.138 2007/07/26 16:58:09 pjd Exp $
  */
 
 #include <sys/param.h>
@@ -175,12 +175,12 @@
 	struct fifoinfo *fip;
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
+	struct file *fp = ap->a_fp;
 	struct socket *rso, *wso;
-	struct file *fp;
 	int error;
 
-	ASSERT_VOP_LOCKED(vp, "fifo_open");
-	if (ap->a_fdidx < 0)
+	ASSERT_VOP_ELOCKED(vp, "fifo_open");
+	if (fp == NULL)
 		return (EINVAL);
 	if ((fip = vp->v_fifoinfo) == NULL) {
 		MALLOC(fip, struct fifoinfo *, sizeof(*fip), M_VNODE, M_WAITOK);
@@ -293,11 +293,12 @@
 		}
 	}
 	mtx_unlock(&fifo_mtx);
-	KASSERT(ap->a_fdidx >= 0, ("can't fifo/vnode bypass %d", ap->a_fdidx));
-	fp = ap->a_td->td_proc->p_fd->fd_ofiles[ap->a_fdidx];
+	KASSERT(fp != NULL, ("can't fifo/vnode bypass"));
+	FILE_LOCK(fp);
 	KASSERT(fp->f_ops == &badfileops, ("not badfileops in fifo_open"));
-	fp->f_ops = &fifo_ops_f;
 	fp->f_data = fip;
+	fp->f_ops = &fifo_ops_f;
+	FILE_UNLOCK(fp);
 	return (0);
 }
 
@@ -447,11 +448,10 @@
 {
 	register struct fifoinfo *fip = vp->v_fifoinfo;
 
-	if (fip == NULL) {
+	if (fip == NULL){
 		printf(", NULL v_fifoinfo");
 		return (0);
 	}
-
 	printf(", fifo with %ld readers and %ld writers",
 		fip->fi_readers, fip->fi_writers);
 	return (0);
@@ -713,7 +713,9 @@
 	if (uio->uio_resid == 0)
 		return (0);
 	sflags = (fp->f_flag & FNONBLOCK) ? MSG_NBIO : 0;
+	mtx_lock(&Giant);
 	error = soreceive(fip->fi_readsock, NULL, uio, NULL, NULL, &sflags);
+	mtx_unlock(&Giant);
 	return (error);
 }
 
@@ -733,6 +735,8 @@
 	fip = fp->f_data;
 	KASSERT(uio->uio_rw == UIO_WRITE,("fifo_write mode"));
 	sflags = (fp->f_flag & FNONBLOCK) ? MSG_NBIO : 0;
+	mtx_lock(&Giant);
 	error = sosend(fip->fi_writesock, NULL, uio, 0, NULL, sflags, td);
+	mtx_unlock(&Giant);
 	return (error);
 }
Index: direntry.h
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/direntry.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/msdosfs/direntry.h -L sys/fs/msdosfs/direntry.h -u -r1.2 -r1.3
--- sys/fs/msdosfs/direntry.h
+++ sys/fs/msdosfs/direntry.h
@@ -1,5 +1,4 @@
-/* $MidnightBSD$ */
-/* $FreeBSD: src/sys/fs/msdosfs/direntry.h,v 1.21 2005/01/06 18:10:38 imp Exp $ */
+/* $FreeBSD: src/sys/fs/msdosfs/direntry.h,v 1.24 2007/08/31 22:29:55 bde Exp $ */
 /*	$NetBSD: direntry.h,v 1.14 1997/11/17 15:36:32 ws Exp $	*/
 
 /*-
@@ -134,25 +133,29 @@
 #define DD_YEAR_SHIFT		9
 
 #ifdef _KERNEL
+struct mbnambuf {
+	size_t	nb_len;
+	int	nb_last_id;
+	char	nb_buf[WIN_MAXLEN + 1];
+};
+
 struct dirent;
 struct msdosfsmount;
 
-char	*mbnambuf_flush(struct dirent *dp);
-void	mbnambuf_init(void);
-void	mbnambuf_write(char *name, int id);
-void	unix2dostime(struct timespec *tsp, u_int16_t *ddp, 
-	    u_int16_t *dtp, u_int8_t *dhp);
-void	dos2unixtime(u_int dd, u_int dt, u_int dh, struct timespec *tsp);
+char	*mbnambuf_flush(struct mbnambuf *nbp, struct dirent *dp);
+void	mbnambuf_init(struct mbnambuf *nbp);
+void	mbnambuf_write(struct mbnambuf *nbp, char *name, int id);
 int	dos2unixfn(u_char dn[11], u_char *un, int lower,
 	    struct msdosfsmount *pmp);
 int	unix2dosfn(const u_char *un, u_char dn[12], size_t unlen, u_int gen,
 	    struct msdosfsmount *pmp);
 int	unix2winfn(const u_char *un, size_t unlen, struct winentry *wep, int cnt,
 	    int chksum, struct msdosfsmount *pmp);
-int	winChkName(const u_char *un, size_t unlen, int chksum,
+int	winChkName(struct mbnambuf *nbp, const u_char *un, size_t unlen,
+	    int chksum, struct msdosfsmount *pmp);
+int	win2unixfn(struct mbnambuf *nbp, struct winentry *wep, int chksum,
 	    struct msdosfsmount *pmp);
-int	win2unixfn(struct winentry *wep, int chksum, struct msdosfsmount *pmp);
-u_int8_t winChksum(u_int8_t *name);
+u_int8_t winChksum(struct direntry *dep);
 int	winSlotCnt(const u_char *un, size_t unlen, struct msdosfsmount *pmp);
 size_t	winLenFixup(const u_char *un, size_t unlen);
 #endif	/* _KERNEL */
Index: denode.h
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/denode.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/msdosfs/denode.h -L sys/fs/msdosfs/denode.h -u -r1.2 -r1.3
--- sys/fs/msdosfs/denode.h
+++ sys/fs/msdosfs/denode.h
@@ -1,5 +1,4 @@
-/* $MidnightBSD$ */
-/* $FreeBSD: src/sys/fs/msdosfs/denode.h,v 1.33 2005/04/07 07:55:37 phk Exp $ */
+/* $FreeBSD: src/sys/fs/msdosfs/denode.h,v 1.37 2007/08/07 03:22:10 bde Exp $ */
 /*	$NetBSD: denode.h,v 1.25 1997/11/17 15:36:28 ws Exp $	*/
 
 /*-
@@ -117,10 +116,12 @@
  * cache is probably pretty worthless if a file is opened by multiple
  * processes.
  */
-#define	FC_SIZE		2	/* number of entries in the cache */
+#define	FC_SIZE		3	/* number of entries in the cache */
 #define	FC_LASTMAP	0	/* entry the last call to pcbmap() resolved
 				 * to */
 #define	FC_LASTFC	1	/* entry for the last cluster in the file */
+#define	FC_NEXTTOLASTFC	2	/* entry for a close to the last cluster in
+				 * the file */
 
 #define	FCE_EMPTY	0xffffffff	/* doesn't represent an actual cluster # */
 
@@ -131,6 +132,13 @@
 	(dep)->de_fc[(slot)].fc_frcn = (frcn); \
 	(dep)->de_fc[(slot)].fc_fsrcn = (fsrcn);
 
+#define	fc_last_to_nexttolast(dep) do {		 \
+	(dep)->de_fc[FC_NEXTTOLASTFC].fc_frcn =  \
+	(dep)->de_fc[FC_LASTFC].fc_frcn;	 \
+	(dep)->de_fc[FC_NEXTTOLASTFC].fc_fsrcn = \
+	(dep)->de_fc[FC_LASTFC].fc_fsrcn;	 \
+} while (0)
+
 /*
  * This is the in memory variant of a dos directory entry.  It is usually
  * contained within a vnode.
@@ -217,32 +225,32 @@
 #define	DETOV(de)	((de)->de_vnode)
 
 #define	DETIMES(dep, acc, mod, cre) do {				\
-	if ((dep)->de_flag & DE_UPDATE) { 				\
+	if ((dep)->de_flag & DE_UPDATE) {				\
 		(dep)->de_flag |= DE_MODIFIED;				\
-		unix2dostime((mod), &(dep)->de_MDate, &(dep)->de_MTime,	\
-		    NULL);						\
-		(dep)->de_Attributes |= ATTR_ARCHIVE; 			\
+		timespec2fattime((mod), 0, &(dep)->de_MDate,		\
+		    &(dep)->de_MTime, NULL);				\
+		(dep)->de_Attributes |= ATTR_ARCHIVE;			\
 	}								\
 	if ((dep)->de_pmp->pm_flags & MSDOSFSMNT_NOWIN95) {		\
 		(dep)->de_flag &= ~(DE_UPDATE | DE_CREATE | DE_ACCESS);	\
 		break;							\
 	}								\
 	if ((dep)->de_flag & DE_ACCESS) {				\
-	    	u_int16_t adate;					\
+		u_int16_t adate;					\
 									\
-		unix2dostime((acc), &adate, NULL, NULL);		\
+		timespec2fattime((acc), 0, &adate, NULL, NULL);		\
 		if (adate != (dep)->de_ADate) {				\
 			(dep)->de_flag |= DE_MODIFIED;			\
 			(dep)->de_ADate = adate;			\
 		}							\
 	}								\
 	if ((dep)->de_flag & DE_CREATE) {				\
-		unix2dostime((cre), &(dep)->de_CDate, &(dep)->de_CTime,	\
-		    &(dep)->de_CHun);					\
-		    (dep)->de_flag |= DE_MODIFIED;			\
+		timespec2fattime((cre), 0, &(dep)->de_CDate,		\
+		    &(dep)->de_CTime, &(dep)->de_CHun);			\
+		(dep)->de_flag |= DE_MODIFIED;				\
 	}								\
 	(dep)->de_flag &= ~(DE_UPDATE | DE_CREATE | DE_ACCESS);		\
-} while (0);
+} while (0)
 
 /*
  * This overlays the fid structure (see mount.h)
Index: msdosfsmount.h
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/msdosfsmount.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/msdosfs/msdosfsmount.h -L sys/fs/msdosfs/msdosfsmount.h -u -r1.2 -r1.3
--- sys/fs/msdosfs/msdosfsmount.h
+++ sys/fs/msdosfs/msdosfsmount.h
@@ -1,5 +1,4 @@
-/* $MidnightBSD$ */
-/* $FreeBSD: src/sys/fs/msdosfs/msdosfsmount.h,v 1.37 2005/03/15 20:57:25 phk Exp $ */
+/* $FreeBSD: src/sys/fs/msdosfs/msdosfsmount.h,v 1.39 2007/07/12 16:09:07 bde Exp $ */
 /*	$NetBSD: msdosfsmount.h,v 1.17 1997/11/17 15:37:07 ws Exp $	*/
 
 /*-
@@ -95,7 +94,7 @@
 	u_long pm_fatsize;	/* size of fat in bytes */
 	u_int32_t pm_fatmask;	/* mask to use for fat numbers */
 	u_long pm_fsinfo;	/* fsinfo block number */
-	u_long pm_nxtfree;	/* next free cluster in fsinfo block */
+	u_long pm_nxtfree;	/* next place to search for a free cluster */
 	u_int pm_fatmult;	/* these 2 values are used in fat */
 	u_int pm_fatdiv;	/*	offset computation */
 	u_int pm_curfat;	/* current fat for FAT32 (0 otherwise) */
@@ -211,12 +210,6 @@
 	 ? roottobn((pmp), (dirofs)) \
 	 : cntobn((pmp), (dirclu)))
 
-/*
- * Calculate fsinfo block size
- */
-#define	fsi_size(pmp) \
-	(1024 << ((pmp)->pm_BlkPerSec >> 2))
-
 void msdosfs_fileno_init(struct mount *);
 void msdosfs_fileno_free(struct mount *);
 uint32_t msdosfs_fileno_map(struct mount *, uint64_t);
Index: msdosfs_vfsops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/msdosfs_vfsops.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -L sys/fs/msdosfs/msdosfs_vfsops.c -L sys/fs/msdosfs/msdosfs_vfsops.c -u -r1.4 -r1.5
--- sys/fs/msdosfs/msdosfs_vfsops.c
+++ sys/fs/msdosfs/msdosfs_vfsops.c
@@ -1,5 +1,4 @@
-/* $MidnightBSD$ */
-/* $FreeBSD: src/sys/fs/msdosfs/msdosfs_vfsops.c,v 1.144.2.3 2006/02/20 00:53:12 yar Exp $ */
+/* $FreeBSD: src/sys/fs/msdosfs/msdosfs_vfsops.c,v 1.174 2007/08/15 17:40:09 jhb Exp $ */
 /*	$NetBSD: msdosfs_vfsops.c,v 1.51 1997/11/17 15:36:58 ws Exp $	*/
 
 /*-
@@ -51,44 +50,40 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/buf.h>
 #include <sys/conf.h>
-#include <sys/namei.h>
-#include <sys/proc.h>
+#include <sys/iconv.h>
 #include <sys/kernel.h>
-#include <sys/vnode.h>
-#include <sys/mount.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/fcntl.h>
+#include <sys/lock.h>
 #include <sys/malloc.h>
-#include <sys/stat.h> 				/* defines ALLPERMS */
-#include <sys/iconv.h>
+#include <sys/mount.h>
 #include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+
+#include <geom/geom.h>
+#include <geom/geom_vfs.h>
 
-#include <fs/msdosfs/bpb.h>
 #include <fs/msdosfs/bootsect.h>
-#include <fs/msdosfs/msdosfsmount.h>
+#include <fs/msdosfs/bpb.h>
 #include <fs/msdosfs/direntry.h>
 #include <fs/msdosfs/denode.h>
 #include <fs/msdosfs/fat.h>
-
-#include <geom/geom.h>
-#include <geom/geom_vfs.h>
-
-#include "opt_msdosfs.h"
+#include <fs/msdosfs/msdosfsmount.h>
 
 /* List of mount options we support */
 static const char *msdosfs_opts[] = {
 	"from",
-	"export",
+	"noatime", "export", "force", "sync",
 	"uid", "gid", "mask", "dirmask",
-	"shortname", "longname", "win95",
-	"kiconv", "cs_win", "cs_dos", "cs_local",
+	"shortname", "shortnames", "longname", "longnames", "nowin95", "win95",
+	"kiconv", "cs_win", "cs_dos", "cs_local", "large",
 	NULL
 };
 
-#define MSDOSFS_DFLTBSIZE       4096
-
 #if 1 /*def PC98*/
 /*
  * XXX - The boot signature formatted by NEC PC-98 DOS looks like a
@@ -100,10 +95,10 @@
 #define	MSDOSFS_NOCHECKSIG
 #endif
 
-MALLOC_DEFINE(M_MSDOSFSMNT, "MSDOSFS mount", "MSDOSFS mount structure");
-static MALLOC_DEFINE(M_MSDOSFSFAT, "MSDOSFS FAT", "MSDOSFS file allocation table");
+MALLOC_DEFINE(M_MSDOSFSMNT, "msdosfs_mount", "MSDOSFS mount structure");
+static MALLOC_DEFINE(M_MSDOSFSFAT, "msdosfs_fat", "MSDOSFS file allocation table");
 
-struct iconv_functions *msdosfs_iconv = NULL;
+struct iconv_functions *msdosfs_iconv;
 
 static int	update_mp(struct mount *mp, struct thread *td);
 static int	mountmsdosfs(struct vnode *devvp, struct mount *mp,
@@ -114,15 +109,12 @@
 static vfs_statfs_t	msdosfs_statfs;
 static vfs_sync_t	msdosfs_sync;
 static vfs_unmount_t	msdosfs_unmount;
-static vfs_vptofh_t	msdosfs_vptofh;
 
 /* Maximum length of a character set name (arbitrary). */
 #define	MAXCSLEN	64
 
 static int
-update_mp(mp, td)
-	struct mount *mp;
-	struct thread *td;
+update_mp(struct mount *mp, struct thread *td)
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	void *dos, *win, *local;
@@ -164,16 +156,19 @@
 		pmp->pm_dirmask = v & ALLPERMS;
 	vfs_flagopt(mp->mnt_optnew, "shortname",
 	    &pmp->pm_flags, MSDOSFSMNT_SHORTNAME);
+	vfs_flagopt(mp->mnt_optnew, "shortnames",
+	    &pmp->pm_flags, MSDOSFSMNT_SHORTNAME);
 	vfs_flagopt(mp->mnt_optnew, "longname",
 	    &pmp->pm_flags, MSDOSFSMNT_LONGNAME);
+	vfs_flagopt(mp->mnt_optnew, "longnames",
+	    &pmp->pm_flags, MSDOSFSMNT_LONGNAME);
 	vfs_flagopt(mp->mnt_optnew, "kiconv",
 	    &pmp->pm_flags, MSDOSFSMNT_KICONV);
 
-	/* XXX: Can't use flagopt due to negative option */
-	if (!vfs_getopt(mp->mnt_optnew, "win95", NULL, NULL))
-		pmp->pm_flags &= ~MSDOSFSMNT_NOWIN95;
-	else
+	if (vfs_getopt(mp->mnt_optnew, "nowin95", NULL, NULL) == 0)
 		pmp->pm_flags |= MSDOSFSMNT_NOWIN95;
+	else
+		pmp->pm_flags &= ~MSDOSFSMNT_NOWIN95;
 
 	if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
 		pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
@@ -190,9 +185,8 @@
 			if ((error =
 			    msdosfs_root(mp, LK_EXCLUSIVE, &rootvp, td)) != 0)
 				return error;
-			pmp->pm_flags |= findwin95(VTODE(rootvp))
-				? MSDOSFSMNT_LONGNAME
-					: MSDOSFSMNT_SHORTNAME;
+			pmp->pm_flags |= findwin95(VTODE(rootvp)) ?
+			    MSDOSFSMNT_LONGNAME : MSDOSFSMNT_SHORTNAME;
 			vput(rootvp);
 		}
 	}
@@ -218,14 +212,14 @@
 	ma = mount_argf(ma, "mask", "%d", args.mask);
 	ma = mount_argf(ma, "dirmask", "%d", args.dirmask);
 
-        ma = mount_argb(ma, args.flags & MSDOSFSMNT_SHORTNAME, "noshortname");
-        ma = mount_argb(ma, args.flags & MSDOSFSMNT_LONGNAME, "nolongname");
-        ma = mount_argb(ma, !(args.flags & MSDOSFSMNT_NOWIN95), "nowin95");
-        ma = mount_argb(ma, args.flags & MSDOSFSMNT_KICONV, "nokiconv");
-
-        ma = mount_argsu(ma, "cs_win", args.cs_win, MAXCSLEN);
-        ma = mount_argsu(ma, "cs_dos", args.cs_dos, MAXCSLEN);
-        ma = mount_argsu(ma, "cs_local", args.cs_local, MAXCSLEN);
+	ma = mount_argb(ma, args.flags & MSDOSFSMNT_SHORTNAME, "noshortname");
+	ma = mount_argb(ma, args.flags & MSDOSFSMNT_LONGNAME, "nolongname");
+	ma = mount_argb(ma, !(args.flags & MSDOSFSMNT_NOWIN95), "nowin95");
+	ma = mount_argb(ma, args.flags & MSDOSFSMNT_KICONV, "nokiconv");
+
+	ma = mount_argsu(ma, "cs_win", args.cs_win, MAXCSLEN);
+	ma = mount_argsu(ma, "cs_dos", args.cs_dos, MAXCSLEN);
+	ma = mount_argsu(ma, "cs_local", args.cs_local, MAXCSLEN);
 
 	error = kernel_mount(ma, flags);
 
@@ -241,7 +235,6 @@
 msdosfs_mount(struct mount *mp, struct thread *td)
 {
 	struct vnode *devvp;	  /* vnode for blk device to mount */
-	struct export_args export;
 	/* msdosfs specific mount control block */
 	struct msdosfsmount *pmp = NULL;
 	struct nameidata ndp;
@@ -257,18 +250,19 @@
 	 * read/write; if there is no device name, that's all we do.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
-		pmp = VFSTOMSDOSFS(mp);
+		int ro_to_rw = 0;
 
-		error = vfs_copyopt(mp->mnt_optnew, "export",
-		    &export, sizeof export);
-		if (error == 0 && export.ex_flags != 0) {
+		pmp = VFSTOMSDOSFS(mp);
+		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) {
 			/*
-			 * Process export requests.
+			 * Forbid export requests if filesystem has
+			 * MSDOSFS_LARGEFS flag set.
 			 */
-			if ((export.ex_flags & MNT_EXPORTED) != 0 &&
-			    (pmp->pm_flags & MSDOSFS_LARGEFS) != 0)
+			if ((pmp->pm_flags & MSDOSFS_LARGEFS) != 0) {
+				vfs_mount_error(mp,
+				    "MSDOSFS_LARGEFS flag set, cannot export");
 				return (EOPNOTSUPP);
-			return (vfs_export(mp, &export));
+			}
 		}
 		if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) &&
 		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
@@ -283,26 +277,33 @@
 				return (error);
 			DROP_GIANT();
 			g_topology_lock();
-			g_access(pmp->pm_cp, 0, -1, 0);
+			error = g_access(pmp->pm_cp, 0, -1, 0);
 			g_topology_unlock();
 			PICKUP_GIANT();
+			if (error)
+				return (error);
+
+			/* Now the volume is clean. Mark it. */
+			error = markvoldirty(pmp, 0);
+			if (error && (flags & FORCECLOSE) == 0)
+				return (error);
 		} else if ((pmp->pm_flags & MSDOSFSMNT_RONLY) &&
 		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
-			if (suser(td)) {
-				devvp = pmp->pm_devvp;
-				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
-				error = VOP_ACCESS(devvp, VREAD | VWRITE,
-						   td->td_ucred, td);
-				if (error) {
-					VOP_UNLOCK(devvp, 0, td);
-					return (error);
-				}
+			devvp = pmp->pm_devvp;
+			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
+			error = VOP_ACCESS(devvp, VREAD | VWRITE,
+			    td->td_ucred, td);
+			if (error)
+				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
+			if (error) {
 				VOP_UNLOCK(devvp, 0, td);
+				return (error);
 			}
+			VOP_UNLOCK(devvp, 0, td);
 			DROP_GIANT();
 			g_topology_lock();
 			error = g_access(pmp->pm_cp, 0, 1, 0);
@@ -311,24 +312,18 @@
 			if (error)
 				return (error);
 
-			/* Now that the volume is modifiable, mark it dirty. */
-			error = markvoldirty(pmp, 1);
-			if (error)
-				return (error);
+			ro_to_rw = 1;
 		}
 		vfs_flagopt(mp->mnt_optnew, "ro",
 		    &pmp->pm_flags, MSDOSFSMNT_RONLY);
 		vfs_flagopt(mp->mnt_optnew, "ro",
 		    &mp->mnt_flag, MNT_RDONLY);
-		if (vfs_getopt(mp->mnt_optnew, "from", NULL, NULL)) {
-#ifdef	__notyet__	/* doesn't work correctly with current mountd	XXX */
-			if (args.flags & MSDOSFSMNT_MNTOPT) {
-				pmp->pm_flags &= ~MSDOSFSMNT_MNTOPT;
-				pmp->pm_flags |= args.flags & MSDOSFSMNT_MNTOPT;
-				if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
-					pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
-			}
-#endif
+
+		if (ro_to_rw) {
+			/* Now that the volume is modifiable, mark it dirty. */
+			error = markvoldirty(pmp, 1);
+			if (error)
+				return (error);
 		}
 	}
 	/*
@@ -352,15 +347,15 @@
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 */
-	if (suser(td)) {
-		accessmode = VREAD;
-		if ((mp->mnt_flag & MNT_RDONLY) == 0)
-			accessmode |= VWRITE;
-		error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td);
-		if (error) {
-			vput(devvp);
-			return (error);
-		}
+	accessmode = VREAD;
+	if ((mp->mnt_flag & MNT_RDONLY) == 0)
+		accessmode |= VWRITE;
+	error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td);
+	if (error)
+		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
+	if (error) {
+		vput(devvp);
+		return (error);
 	}
 	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
 		error = mountmsdosfs(devvp, mp, td);
@@ -384,7 +379,7 @@
 			msdosfs_unmount(mp, MNT_FORCE, td);
 		return error;
 	}
-	
+
 	vfs_mountedfrom(mp, from);
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap);
@@ -393,10 +388,7 @@
 }
 
 static int
-mountmsdosfs(devvp, mp, td)
-	struct vnode *devvp;
-	struct mount *mp;
-	struct thread *td;
+mountmsdosfs(struct vnode *devvp, struct mount *mp, struct thread *td)
 {
 	struct msdosfsmount *pmp;
 	struct buf *bp;
@@ -407,15 +399,15 @@
 	struct byte_bpb710 *b710;
 	u_int8_t SecPerClust;
 	u_long clusters;
-	int	ronly, error;
+	int ronly, error;
 	struct g_consumer *cp;
 	struct bufobj *bo;
 
-	ronly = !vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL);
+	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	/* XXX: use VOP_ACCESS to check FS perms */
 	DROP_GIANT();
 	g_topology_lock();
-	error = g_vfs_open(devvp, &cp, "msdos", ronly ? 0 : 1);
+	error = g_vfs_open(devvp, &cp, "msdosfs", ronly ? 0 : 1);
 	g_topology_unlock();
 	PICKUP_GIANT();
 	VOP_UNLOCK(devvp, 0, td);
@@ -423,16 +415,16 @@
 		return (error);
 
 	bo = &devvp->v_bufobj;
-	bp  = NULL; /* both used in error_exit */
+	bp = NULL;		/* This and pmp both used in error_exit. */
 	pmp = NULL;
 
 	/*
 	 * Read the boot sector of the filesystem, and then check the
 	 * boot signature.  If not a dos boot sector then error out.
 	 *
-	 * NOTE: 2048 is a maximum sector size in current...
+	 * NOTE: 8192 is a magic size that works for ffs.
 	 */
-	error = bread(devvp, 0, 2048, NOCRED, &bp);
+	error = bread(devvp, 0, 8192, NOCRED, &bp);
 	if (error)
 		goto error_exit;
 	bp->b_flags |= B_AGE;
@@ -455,6 +447,29 @@
 	pmp->pm_bo = bo;
 
 	/*
+	 * Initialize ownerships and permissions, since nothing else will
+	 * initialize them iff we are mounting root.
+	 */
+	pmp->pm_uid = UID_ROOT;
+	pmp->pm_gid = GID_WHEEL;
+	pmp->pm_mask = pmp->pm_dirmask = S_IXUSR | S_IXGRP | S_IXOTH |
+	    S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR;
+
+	/*
+	 * Experimental support for large MS-DOS filesystems.
+	 * WARNING: This uses at least 32 bytes of kernel memory (which is not
+	 * reclaimed until the FS is unmounted) for each file on disk to map
+	 * between the 32-bit inode numbers used by VFS and the 64-bit
+	 * pseudo-inode numbers used internally by msdosfs. This is only
+	 * safe to use in certain controlled situations (e.g. read-only FS
+	 * with less than 1 million files).
+	 * Since the mappings do not persist across unmounts (or reboots), these
+	 * filesystems are not suitable for exporting through NFS, or any other
+	 * application that requires fixed inode numbers.
+	 */
+	vfs_flagopt(mp->mnt_optnew, "large", &pmp->pm_flags, MSDOSFS_LARGEFS);
+
+	/*
 	 * Compute several useful quantities from the bpb in the
 	 * bootsector.  Copy in the dos 5 variant of the bpb then fix up
 	 * the fields that are different between dos 5 and dos 3.3.
@@ -496,19 +511,20 @@
 		pmp->pm_HiddenSects = getushort(b33->bpbHiddenSecs);
 		pmp->pm_HugeSectors = pmp->pm_Sectors;
 	}
-#ifndef MSDOSFS_LARGE
-	if (pmp->pm_HugeSectors > 0xffffffff / 
-	    (pmp->pm_BytesPerSec / sizeof(struct direntry)) + 1) {
-		/*
-		 * We cannot deal currently with this size of disk
-		 * due to fileid limitations (see msdosfs_getattr and
-		 * msdosfs_readdir)
-		 */
-		error = EINVAL;
-		printf("mountmsdosfs(): disk too big, sorry\n");
-		goto error_exit;
+	if (!(pmp->pm_flags & MSDOSFS_LARGEFS)) {
+		if (pmp->pm_HugeSectors > 0xffffffff /
+		    (pmp->pm_BytesPerSec / sizeof(struct direntry)) + 1) {
+			/*
+			 * We cannot deal currently with this size of disk
+			 * due to fileid limitations (see msdosfs_getattr and
+			 * msdosfs_readdir)
+			 */
+			error = EINVAL;
+			vfs_mount_error(mp,
+			    "Disk too big, try '-o large' mount option");
+			goto error_exit;
+		}
 	}
-#endif	/* !MSDOSFS_LARGE */
 
 	if (pmp->pm_RootDirEnts == 0) {
 		if (pmp->pm_Sectors
@@ -548,7 +564,7 @@
 	}
 
 	pmp->pm_HugeSectors *= pmp->pm_BlkPerSec;
-	pmp->pm_HiddenSects *= pmp->pm_BlkPerSec; /* XXX not used? */
+	pmp->pm_HiddenSects *= pmp->pm_BlkPerSec;	/* XXX not used? */
 	pmp->pm_FATsecs     *= pmp->pm_BlkPerSec;
 	SecPerClust         *= pmp->pm_BlkPerSec;
 
@@ -570,7 +586,7 @@
 
 	pmp->pm_maxcluster = (pmp->pm_HugeSectors - pmp->pm_firstcluster) /
 	    SecPerClust + 1;
-	pmp->pm_fatsize = pmp->pm_FATsecs * DEV_BSIZE; /* XXX not used? */
+	pmp->pm_fatsize = pmp->pm_FATsecs * DEV_BSIZE;	/* XXX not used? */
 
 	if (pmp->pm_fatmask == 0) {
 		if (pmp->pm_maxcluster
@@ -597,12 +613,12 @@
 		pmp->pm_maxcluster = clusters - 1;
 	}
 
-
 	if (FAT12(pmp))
-		pmp->pm_fatblocksize = 3 * pmp->pm_BytesPerSec;
+		pmp->pm_fatblocksize = 3 * 512;
 	else
-		pmp->pm_fatblocksize = MSDOSFS_DFLTBSIZE;
-
+		pmp->pm_fatblocksize = PAGE_SIZE;
+	pmp->pm_fatblocksize = roundup(pmp->pm_fatblocksize,
+	    pmp->pm_BytesPerSec);
 	pmp->pm_fatblocksec = pmp->pm_fatblocksize / DEV_BSIZE;
 	pmp->pm_bnshift = ffs(DEV_BSIZE) - 1;
 
@@ -630,21 +646,23 @@
 	bp = NULL;
 
 	/*
-	 * Check FSInfo.
+	 * Check the fsinfo sector if we have one.  Silently fix up our
+	 * in-core copy of fp->fsinxtfree if it is unknown (0xffffffff)
+	 * or too large.  Ignore fp->fsinfree for now, since we need to
+	 * read the entire FAT anyway to fill the inuse map.
 	 */
 	if (pmp->pm_fsinfo) {
 		struct fsinfo *fp;
 
-		if ((error = bread(devvp, pmp->pm_fsinfo, fsi_size(pmp),
+		if ((error = bread(devvp, pmp->pm_fsinfo, pmp->pm_BytesPerSec,
 		    NOCRED, &bp)) != 0)
 			goto error_exit;
 		fp = (struct fsinfo *)bp->b_data;
 		if (!bcmp(fp->fsisig1, "RRaA", 4)
 		    && !bcmp(fp->fsisig2, "rrAa", 4)
-		    && !bcmp(fp->fsisig3, "\0\0\125\252", 4)
-		    && !bcmp(fp->fsisig4, "\0\0\125\252", 4)) {
+		    && !bcmp(fp->fsisig3, "\0\0\125\252", 4)) {
 			pmp->pm_nxtfree = getulong(fp->fsinxtfree);
-			if (pmp->pm_nxtfree == 0xffffffff)
+			if (pmp->pm_nxtfree > pmp->pm_maxcluster)
 				pmp->pm_nxtfree = CLUST_FIRST;
 		} else
 			pmp->pm_fsinfo = 0;
@@ -653,15 +671,14 @@
 	}
 
 	/*
-	 * Check and validate (or perhaps invalidate?) the fsinfo structure?
+	 * Finish initializing pmp->pm_nxtfree (just in case the first few
+	 * sectors aren't properly reserved in the FAT).  This completes
+	 * the fixup for fp->fsinxtfree, and fixes up the zero-initialized
+	 * value if there is no fsinfo.  We will use pmp->pm_nxtfree
+	 * internally even if there is no fsinfo.
 	 */
-	if (pmp->pm_fsinfo && pmp->pm_nxtfree > pmp->pm_maxcluster) {
-		printf(
-		"Next free cluster in FSInfo (%lu) exceeds maxcluster (%lu)\n",
-		    pmp->pm_nxtfree, pmp->pm_maxcluster);
-		error = EINVAL;
-		goto error_exit;
-	}
+	if (pmp->pm_nxtfree < CLUST_FIRST)
+		pmp->pm_nxtfree = CLUST_FIRST;
 
 	/*
 	 * Allocate memory for the bitmap of allocated clusters, and then
@@ -705,11 +722,12 @@
 	mp->mnt_data = (qaddr_t) pmp;
 	mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
+	MNT_ILOCK(mp);
 	mp->mnt_flag |= MNT_LOCAL;
+	MNT_IUNLOCK(mp);
 
-#ifdef MSDOSFS_LARGE
-	msdosfs_fileno_init(mp);
-#endif
+	if (pmp->pm_flags & MSDOSFS_LARGEFS)
+		msdosfs_fileno_init(mp);
 
 	return 0;
 
@@ -736,10 +754,7 @@
  * Unmount the filesystem described by mp.
  */
 static int
-msdosfs_unmount(mp, mntflags, td)
-	struct mount *mp;
-	int mntflags;
-	struct thread *td;
+msdosfs_unmount(struct mount *mp, int mntflags, struct thread *td)
 {
 	struct msdosfsmount *pmp;
 	int error, flags;
@@ -792,21 +807,19 @@
 	PICKUP_GIANT();
 	vrele(pmp->pm_devvp);
 	free(pmp->pm_inusemap, M_MSDOSFSFAT);
-#ifdef MSDOSFS_LARGE
-	msdosfs_fileno_free(mp);
-#endif
+	if (pmp->pm_flags & MSDOSFS_LARGEFS) {
+		msdosfs_fileno_free(mp);
+	}
 	free(pmp, M_MSDOSFSMNT);
 	mp->mnt_data = (qaddr_t)0;
+	MNT_ILOCK(mp);
 	mp->mnt_flag &= ~MNT_LOCAL;
+	MNT_IUNLOCK(mp);
 	return (error);
 }
 
 static int
-msdosfs_root(mp, flags, vpp, td)
-	struct mount *mp;
-	int flags;
-	struct vnode **vpp;
-	struct thread *td;
+msdosfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	struct denode *ndep;
@@ -823,10 +836,7 @@
 }
 
 static int
-msdosfs_statfs(mp, sbp, td)
-	struct mount *mp;
-	struct statfs *sbp;
-	struct thread *td;
+msdosfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
 {
 	struct msdosfsmount *pmp;
 
@@ -836,16 +846,13 @@
 	sbp->f_blocks = pmp->pm_maxcluster + 1;
 	sbp->f_bfree = pmp->pm_freeclustercount;
 	sbp->f_bavail = pmp->pm_freeclustercount;
-	sbp->f_files = pmp->pm_RootDirEnts;			/* XXX */
+	sbp->f_files = pmp->pm_RootDirEnts;	/* XXX */
 	sbp->f_ffree = 0;	/* what to put in here? */
 	return (0);
 }
 
 static int
-msdosfs_sync(mp, waitfor, td)
-	struct mount *mp;
-	int waitfor;
-	struct thread *td;
+msdosfs_sync(struct mount *mp, int waitfor, struct thread *td)
 {
 	struct vnode *vp, *nvp;
 	struct denode *dep;
@@ -914,10 +921,7 @@
 }
 
 static int
-msdosfs_fhtovp(mp, fhp, vpp)
-	struct mount *mp;
-	struct fid *fhp;
-	struct vnode **vpp;
+msdosfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	struct defid *defhp = (struct defid *) fhp;
@@ -930,24 +934,7 @@
 		return (error);
 	}
 	*vpp = DETOV(dep);
-	vnode_create_vobject_off(*vpp, dep->de_FileSize, curthread);
-	return (0);
-}
-
-static int
-msdosfs_vptofh(vp, fhp)
-	struct vnode *vp;
-	struct fid *fhp;
-{
-	struct denode *dep;
-	struct defid *defhp;
-
-	dep = VTODE(vp);
-	defhp = (struct defid *)fhp;
-	defhp->defid_len = sizeof(struct defid);
-	defhp->defid_dirclust = dep->de_dirclust;
-	defhp->defid_dirofs = dep->de_diroffset;
-	/* defhp->defid_gen = dep->de_gen; */
+	vnode_create_vobject(*vpp, dep->de_FileSize, curthread);
 	return (0);
 }
 
@@ -959,7 +946,6 @@
 	.vfs_statfs =		msdosfs_statfs,
 	.vfs_sync =		msdosfs_sync,
 	.vfs_unmount =		msdosfs_unmount,
-	.vfs_vptofh =		msdosfs_vptofh,
 };
 
 VFS_SET(msdosfs_vfsops, msdosfs, 0);
Index: msdosfs_iconv.c
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/msdosfs_iconv.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/msdosfs/msdosfs_iconv.c -L sys/fs/msdosfs/msdosfs_iconv.c -u -r1.2 -r1.3
--- sys/fs/msdosfs/msdosfs_iconv.c
+++ sys/fs/msdosfs/msdosfs_iconv.c
@@ -25,13 +25,12 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
-__FBSDID("$FreeBSD: src/sys/fs/msdosfs/msdosfs_iconv.c,v 1.2 2003/12/26 17:24:37 trhodes Exp $");
+__FBSDID("$FreeBSD: src/sys/fs/msdosfs/msdosfs_iconv.c,v 1.4 2007/08/07 02:25:56 bde Exp $");
 
 #include <sys/param.h>
+#include <sys/iconv.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/mount.h>
-#include <sys/iconv.h>
 
 VFS_DECLARE_ICONV(msdosfs);
Index: fat.h
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/fat.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/msdosfs/fat.h -L sys/fs/msdosfs/fat.h -u -r1.2 -r1.3
--- sys/fs/msdosfs/fat.h
+++ sys/fs/msdosfs/fat.h
@@ -1,4 +1,3 @@
-/* $MidnightBSD$ */
 /* $FreeBSD: src/sys/fs/msdosfs/fat.h,v 1.14 2005/01/06 18:10:38 imp Exp $ */
 /*	$NetBSD: fat.h,v 1.12 1997/11/17 15:36:36 ws Exp $	*/
 
Index: bootsect.h
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/bootsect.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/msdosfs/bootsect.h -L sys/fs/msdosfs/bootsect.h -u -r1.2 -r1.3
--- sys/fs/msdosfs/bootsect.h
+++ sys/fs/msdosfs/bootsect.h
@@ -1,5 +1,4 @@
-/* $MidnightBSD$ */
-/* $FreeBSD: src/sys/fs/msdosfs/bootsect.h,v 1.12.2.1 2005/10/04 13:13:57 peadar Exp $ */
+/* $FreeBSD: src/sys/fs/msdosfs/bootsect.h,v 1.13 2005/09/29 14:09:46 peadar Exp $ */
 /*	$NetBSD: bootsect.h,v 1.9 1997/11/17 15:36:17 ws Exp $	*/
 
 /*-
Index: msdosfs_lookup.c
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/msdosfs_lookup.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/msdosfs/msdosfs_lookup.c -L sys/fs/msdosfs/msdosfs_lookup.c -u -r1.2 -r1.3
--- sys/fs/msdosfs/msdosfs_lookup.c
+++ sys/fs/msdosfs/msdosfs_lookup.c
@@ -1,5 +1,4 @@
-/* $MidnightBSD$ */
-/* $FreeBSD: src/sys/fs/msdosfs/msdosfs_lookup.c,v 1.46 2005/04/16 23:47:19 das Exp $ */
+/* $FreeBSD: src/sys/fs/msdosfs/msdosfs_lookup.c,v 1.51 2007/08/31 22:29:55 bde Exp $ */
 /*	$NetBSD: msdosfs_lookup.c,v 1.37 1997/11/17 15:36:54 ws Exp $	*/
 
 /*-
@@ -51,17 +50,16 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/namei.h>
-#include <sys/bio.h>
 #include <sys/buf.h>
-#include <sys/vnode.h>
 #include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
 
 #include <fs/msdosfs/bpb.h>
-#include <fs/msdosfs/msdosfsmount.h>
 #include <fs/msdosfs/direntry.h>
 #include <fs/msdosfs/denode.h>
 #include <fs/msdosfs/fat.h>
+#include <fs/msdosfs/msdosfsmount.h>
 
 /*
  * When we search a directory the blocks containing directory entries are
@@ -86,6 +84,7 @@
 		struct componentname *a_cnp;
 	} */ *ap;
 {
+	struct mbnambuf nb;
 	struct vnode *vdp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
@@ -187,7 +186,7 @@
 	 * by cnp->cn_nameptr.
 	 */
 	tdp = NULL;
-	mbnambuf_init();
+	mbnambuf_init(&nb);
 	/*
 	 * The outer loop ranges over the clusters that make up the
 	 * directory.  Note that the root directory is different from all
@@ -227,7 +226,7 @@
 				 * Drop memory of previous long matches
 				 */
 				chksum = -1;
-				mbnambuf_init();
+				mbnambuf_init(&nb);
 
 				if (slotcount < wincnt) {
 					slotcount++;
@@ -252,16 +251,15 @@
 					if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
 						continue;
 
-					chksum = win2unixfn((struct winentry *)dep,
-							    chksum,
-							    pmp);
+					chksum = win2unixfn(&nb,
+					    (struct winentry *)dep, chksum,
+					    pmp);
 					continue;
 				}
 
-				chksum = winChkName((const u_char *)cnp->cn_nameptr,
-						    unlen,
-						    chksum,
-						    pmp);
+				chksum = winChkName(&nb,
+				    (const u_char *)cnp->cn_nameptr, unlen,
+				    chksum, pmp);
 				if (chksum == -2) {
 					chksum = -1;
 					continue;
@@ -279,7 +277,7 @@
 				/*
 				 * Check for a checksum or name match
 				 */
-				chksum_ok = (chksum == winChksum(dep->deName));
+				chksum_ok = (chksum == winChksum(dep));
 				if (!chksum_ok
 				    && (!olddos || bcmp(dosfilename, dep->deName, 11))) {
 					chksum = -1;
@@ -620,7 +618,7 @@
 	 * Now write the Win95 long name
 	 */
 	if (ddep->de_fndcnt > 0) {
-		u_int8_t chksum = winChksum(ndep->deName);
+		u_int8_t chksum = winChksum(ndep);
 		const u_char *un = (const u_char *)cnp->cn_nameptr;
 		int unlen = cnp->cn_namelen;
 		int cnt = 1;
Index: msdosfs_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/msdosfs_vnops.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -L sys/fs/msdosfs/msdosfs_vnops.c -L sys/fs/msdosfs/msdosfs_vnops.c -u -r1.7 -r1.8
--- sys/fs/msdosfs/msdosfs_vnops.c
+++ sys/fs/msdosfs/msdosfs_vnops.c
@@ -1,5 +1,4 @@
-/* $MidnightBSD$ */
-/* $FreeBSD: src/sys/fs/msdosfs/msdosfs_vnops.c,v 1.160.2.1 2006/02/20 00:53:12 yar Exp $ */
+/* $FreeBSD: src/sys/fs/msdosfs/msdosfs_vnops.c,v 1.179 2007/08/31 22:29:55 bde Exp $ */
 /*	$NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $	*/
 
 /*-
@@ -51,35 +50,32 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/lockf.h>
-#include <sys/namei.h>
-#include <sys/resourcevar.h>	/* defines plimit structure in proc struct */
-#include <sys/kernel.h>
-#include <sys/stat.h>
 #include <sys/bio.h>
-#include <sys/conf.h>
 #include <sys/buf.h>
-#include <sys/proc.h>
+#include <sys/clock.h>
+#include <sys/dirent.h>
+#include <sys/lock.h>
+#include <sys/lockf.h>
+#include <sys/malloc.h>
 #include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/signalvar.h>
+#include <sys/stat.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
-#include <sys/malloc.h>
-#include <sys/dirent.h>
-#include <sys/signalvar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
-#include <vm/vnode_pager.h>
-
-#include <machine/mutex.h>
 
 #include <fs/msdosfs/bpb.h>
-#include <fs/msdosfs/msdosfsmount.h>
 #include <fs/msdosfs/direntry.h>
 #include <fs/msdosfs/denode.h>
 #include <fs/msdosfs/fat.h>
-
-#include "opt_msdosfs.h"
+#include <fs/msdosfs/msdosfsmount.h>
 
 #define	DOS_FILESIZE_MAX	0xffffffff
 
@@ -108,6 +104,7 @@
 static vop_strategy_t	msdosfs_strategy;
 static vop_print_t	msdosfs_print;
 static vop_pathconf_t	msdosfs_pathconf;
+static vop_vptofh_t	msdosfs_vptofh;
 
 /*
  * Some general notes:
@@ -208,6 +205,7 @@
 		struct vattr *a_vap;
 	} */ *ap;
 {
+
     return (EINVAL);
 }
 
@@ -222,7 +220,7 @@
 	} */ *ap;
 {
 	struct denode *dep = VTODE(ap->a_vp);
-	vnode_create_vobject_off(ap->a_vp, dep->de_FileSize, ap->a_td);
+	vnode_create_vobject(ap->a_vp, dep->de_FileSize, ap->a_td);
 	return 0;
 }
 
@@ -267,14 +265,12 @@
 	file_mode &= (vp->v_type == VDIR ? pmp->pm_dirmask : pmp->pm_mask);
 
 	/*
-	 * Disallow write attempts on read-only filesystems;
-	 * unless the file is a socket, fifo, or a block or
-	 * character device resident on the filesystem.
+	 * Disallow writing to directories and regular files if the
+	 * filesystem is read-only.
 	 */
 	if (mode & VWRITE) {
 		switch (vp->v_type) {
 		case VDIR:
-		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
@@ -323,33 +319,35 @@
 		    dirsperblk;
 		if (dep->de_dirclust == MSDOSFSROOT)
 			fileid = (uint64_t)roottobn(pmp, 0) * dirsperblk;
-		fileid += (uint64_t)dep->de_diroffset / sizeof(struct direntry);
+		fileid += (uoff_t)dep->de_diroffset / sizeof(struct direntry);
 	}
-#ifdef MSDOSFS_LARGE
-	vap->va_fileid = msdosfs_fileno_map(pmp->pm_mountp, fileid);
-#else
-	vap->va_fileid = (long)fileid;
-#endif
+
+	if (pmp->pm_flags & MSDOSFS_LARGEFS)
+		vap->va_fileid = msdosfs_fileno_map(pmp->pm_mountp, fileid);
+	else
+		vap->va_fileid = (long)fileid;
+
 	if ((dep->de_Attributes & ATTR_READONLY) == 0)
 		mode = S_IRWXU|S_IRWXG|S_IRWXO;
 	else
 		mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
 	vap->va_mode = mode & 
-		(ap->a_vp->v_type == VDIR ? pmp->pm_dirmask : pmp->pm_mask);
+	    (ap->a_vp->v_type == VDIR ? pmp->pm_dirmask : pmp->pm_mask);
 	vap->va_uid = pmp->pm_uid;
 	vap->va_gid = pmp->pm_gid;
 	vap->va_nlink = 1;
 	vap->va_rdev = 0;
 	vap->va_size = dep->de_FileSize;
-	dos2unixtime(dep->de_MDate, dep->de_MTime, 0, &vap->va_mtime);
+	fattime2timespec(dep->de_MDate, dep->de_MTime, 0, 0, &vap->va_mtime);
 	vap->va_ctime = vap->va_mtime;
 	if (pmp->pm_flags & MSDOSFSMNT_LONGNAME) {
-		dos2unixtime(dep->de_ADate, 0, 0, &vap->va_atime);
-		dos2unixtime(dep->de_CDate, dep->de_CTime, dep->de_CHun, &vap->va_birthtime);
+		fattime2timespec(dep->de_ADate, 0, 0, 0, &vap->va_atime);
+		fattime2timespec(dep->de_CDate, dep->de_CTime, dep->de_CHun,
+		    0, &vap->va_birthtime);
 	} else {
 		vap->va_atime = vap->va_mtime;
 		vap->va_birthtime.tv_sec = -1;
-		vap->va_birthtime.tv_nsec = 0; 
+		vap->va_birthtime.tv_nsec = 0;
 	}
 	vap->va_flags = 0;
 	if ((dep->de_Attributes & ATTR_ARCHIVE) == 0)
@@ -405,9 +403,11 @@
 	if (vap->va_flags != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
-		if (cred->cr_uid != pmp->pm_uid &&
-		    (error = suser_cred(cred, SUSER_ALLOWJAIL)))
-			return (error);
+		if (cred->cr_uid != pmp->pm_uid) {
+			error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0);
+			if (error)
+				return (error);
+		}
 		/*
 		 * We are very inconsistent about handling unsupported
 		 * attributes.  We ignored the access time and the
@@ -420,9 +420,10 @@
 		 * set ATTR_ARCHIVE for directories `cp -pr' from a more
 		 * sensible filesystem attempts it a lot.
 		 */
-		if (suser_cred(cred, SUSER_ALLOWJAIL)) {
-			if (vap->va_flags & SF_SETTABLE)
-				return EPERM;
+		if (vap->va_flags & SF_SETTABLE) {
+			error = priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0);
+			if (error)
+				return (error);
 		}
 		if (vap->va_flags & ~SF_ARCHIVED)
 			return EOPNOTSUPP;
@@ -445,30 +446,39 @@
 		gid = vap->va_gid;
 		if (gid == (gid_t)VNOVAL)
 			gid = pmp->pm_gid;
-		if ((cred->cr_uid != pmp->pm_uid || uid != pmp->pm_uid ||
-		    (gid != pmp->pm_gid && !groupmember(gid, cred))) &&
-		    (error = suser_cred(cred, SUSER_ALLOWJAIL)))
-			return error;
+		if (cred->cr_uid != pmp->pm_uid || uid != pmp->pm_uid ||
+		    (gid != pmp->pm_gid && !groupmember(gid, cred))) {
+			error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0);
+			if (error)
+				return (error);
+		}
 		if (uid != pmp->pm_uid || gid != pmp->pm_gid)
 			return EINVAL;
 	}
 
 	if (vap->va_size != VNOVAL) {
-		/*
-		 * Disallow write attempts on read-only filesystems;
-		 * unless the file is a socket, fifo, or a block or
-		 * character device resident on the filesystem.
-		 */
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
-			/* NOT REACHED */
-		case VLNK:
 		case VREG:
+			/*
+			 * Truncation is only supported for regular files,
+			 * Disallow it if the filesystem is read-only.
+			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		default:
+			/*
+			 * According to POSIX, the result is unspecified
+			 * for file types other than regular files,
+			 * directories and shared memory objects.  We
+			 * don't support any file types except regular
+			 * files and directories in this file system, so
+			 * this (default) case is unreachable and can do
+			 * anything.  Keep falling through to detrunc()
+			 * for now.
+			 */
 			break;
 		}
 		error = detrunc(dep, vap->va_size, 0, cred, ap->a_td);
@@ -478,22 +488,24 @@
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
-		if (cred->cr_uid != pmp->pm_uid &&
-		    (error = suser_cred(cred, SUSER_ALLOWJAIL)) &&
-		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
-		    (error = VOP_ACCESS(ap->a_vp, VWRITE, cred, ap->a_td))))
-			return (error);
+		if (vap->va_vaflags & VA_UTIMES_NULL) {
+			error = VOP_ACCESS(vp, VADMIN, cred, ap->a_td); 
+			if (error)
+				error = VOP_ACCESS(vp, VWRITE, cred,
+				    ap->a_td);
+		} else
+			error = VOP_ACCESS(vp, VADMIN, cred, ap->a_td);
 		if (vp->v_type != VDIR) {
 			if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) == 0 &&
 			    vap->va_atime.tv_sec != VNOVAL) {
 				dep->de_flag &= ~DE_ACCESS;
-				unix2dostime(&vap->va_atime, &dep->de_ADate,
-				    NULL, NULL);
+				timespec2fattime(&vap->va_atime, 0,
+				    &dep->de_ADate, NULL, NULL);
 			}
 			if (vap->va_mtime.tv_sec != VNOVAL) {
 				dep->de_flag &= ~DE_UPDATE;
-				unix2dostime(&vap->va_mtime, &dep->de_MDate,
-				    &dep->de_MTime, NULL);
+				timespec2fattime(&vap->va_mtime, 0,
+				    &dep->de_MDate, &dep->de_MTime, NULL);
 			}
 			dep->de_Attributes |= ATTR_ARCHIVE;
 			dep->de_flag |= DE_MODIFIED;
@@ -507,9 +519,11 @@
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
-		if (cred->cr_uid != pmp->pm_uid &&
-		    (error = suser_cred(cred, SUSER_ALLOWJAIL)))
-			return (error);
+		if (cred->cr_uid != pmp->pm_uid) {
+			error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0);
+			if (error)
+				return (error);
+		}
 		if (vp->v_type != VDIR) {
 			/* We ignore the read and execute bits. */
 			if (vap->va_mode & VWRITE)
@@ -549,18 +563,21 @@
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct uio *uio = ap->a_uio;
 
-	if (uio->uio_offset < 0)
-		return (EINVAL);
-
-	if ((uoff_t)uio->uio_offset > DOS_FILESIZE_MAX)
-		return (0);
 	/*
 	 * If they didn't ask for any data, then we are done.
 	 */
 	orig_resid = uio->uio_resid;
-	if (orig_resid <= 0)
+	if (orig_resid == 0)
 		return (0);
 
+	/*
+	 * The caller is supposed to ensure that
+	 * uio->uio_offset >= 0 and uio->uio_resid >= 0.
+	 * We don't need to check for large offsets as in ffs because
+	 * dep->de_FileSize <= DOS_FILESIZE_MAX < OFF_MAX, so large
+	 * offsets cannot cause overflow even in theory.
+	 */
+
 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
 
 	isadir = dep->de_Attributes & ATTR_DIRECTORY;
@@ -568,6 +585,9 @@
 		if (uio->uio_offset >= dep->de_FileSize)
 			break;
 		lbn = de_cluster(pmp, uio->uio_offset);
+		rablock = lbn + 1;
+		blsize = pmp->pm_bpcluster;
+		on = uio->uio_offset & pmp->pm_crbomask;
 		/*
 		 * If we are operating on a directory file then be sure to
 		 * do i/o with the vnode for the filesystem instead of the
@@ -582,23 +602,22 @@
 			} else if (error)
 				break;
 			error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
+		} else if (de_cn2off(pmp, rablock) >= dep->de_FileSize) {
+			error = bread(vp, lbn, blsize, NOCRED, &bp);
+		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+			error = cluster_read(vp, dep->de_FileSize, lbn, blsize,
+			    NOCRED, on + uio->uio_resid, seqcount, &bp);
+		} else if (seqcount > 1) {
+			rasize = blsize;
+			error = breadn(vp, lbn,
+			    blsize, &rablock, &rasize, 1, NOCRED, &bp);
 		} else {
-			blsize = pmp->pm_bpcluster;
-			rablock = lbn + 1;
-			if (seqcount > 1 &&
-			    de_cn2off(pmp, rablock) < dep->de_FileSize) {
-				rasize = pmp->pm_bpcluster;
-				error = breadn(vp, lbn, blsize,
-				    &rablock, &rasize, 1, NOCRED, &bp);
-			} else {
-				error = bread(vp, lbn, blsize, NOCRED, &bp);
-			}
+			error = bread(vp, lbn, blsize, NOCRED, &bp);
 		}
 		if (error) {
 			brelse(bp);
 			break;
 		}
-		on = uio->uio_offset & pmp->pm_crbomask;
 		diff = pmp->pm_bpcluster - on;
 		n = diff > uio->uio_resid ? uio->uio_resid : diff;
 		diff = dep->de_FileSize - uio->uio_offset;
@@ -634,6 +653,7 @@
 	u_long osize;
 	int error = 0;
 	u_long count;
+	int seqcount;
 	daddr_t bn, lastcn;
 	struct buf *bp;
 	int ioflag = ap->a_ioflag;
@@ -664,13 +684,22 @@
 		panic("msdosfs_write(): bad file type");
 	}
 
-	if (uio->uio_offset < 0)
-		return (EFBIG);
-
+	/*
+	 * This is needed (unlike in ffs_write()) because we extend the
+	 * file outside of the loop but we don't want to extend the file
+	 * for writes of 0 bytes.
+	 */
 	if (uio->uio_resid == 0)
 		return (0);
 
 	/*
+	 * The caller is supposed to ensure that
+	 * uio->uio_offset >= 0 and uio->uio_resid >= 0.
+	 */
+	if ((uoff_t)uio->uio_offset + uio->uio_resid > DOS_FILESIZE_MAX)
+		return (EFBIG);
+
+	/*
 	 * If they've exceeded their filesize limit, tell them about it.
 	 */
 	if (td != NULL) {
@@ -684,9 +713,6 @@
 		PROC_UNLOCK(td->td_proc);
 	}
 
-	if ((uoff_t)uio->uio_offset + uio->uio_resid > DOS_FILESIZE_MAX)
-		return (EFBIG);
-
 	/*
 	 * If the offset we are starting the write at is beyond the end of
 	 * the file, then they've done a seek.  Unix filesystems allow
@@ -719,6 +745,7 @@
 	} else
 		lastcn = de_clcount(pmp, osize) - 1;
 
+	seqcount = ioflag >> IO_SEQSHIFT;
 	do {
 		if (de_cluster(pmp, uio->uio_offset) > lastcn) {
 			error = ENOSPC;
@@ -744,7 +771,7 @@
 			 * then no need to read data from disk.
 			 */
 			bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0, 0);
-			clrbuf(bp);
+			vfs_bio_clrbuf(bp);
 			/*
 			 * Do the bmap now, since pcbmap needs buffers
 			 * for the fat table. (see msdosfs_strategy)
@@ -787,18 +814,31 @@
 			break;
 		}
 
+		/* Prepare for clustered writes in some else clauses. */
+		if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
+			bp->b_flags |= B_CLUSTEROK;
+
 		/*
-		 * If they want this synchronous then write it and wait for
-		 * it.  Otherwise, if on a cluster boundary write it
-		 * asynchronously so we can move on to the next block
-		 * without delay.  Otherwise do a delayed write because we
-		 * may want to write somemore into the block later.
+		 * If IO_SYNC, then each buffer is written synchronously.
+		 * Otherwise, if we have a severe page deficiency then
+		 * write the buffer asynchronously.  Otherwise, if on a
+		 * cluster boundary then write the buffer asynchronously,
+		 * combining it with contiguous clusters if permitted and
+		 * possible, since we don't expect more writes into this
+		 * buffer soon.  Otherwise, do a delayed write because we
+		 * expect more writes into this buffer soon.
 		 */
 		if (ioflag & IO_SYNC)
-			(void) bwrite(bp);
-		else if (n + croffset == pmp->pm_bpcluster)
+			(void)bwrite(bp);
+		else if (vm_page_count_severe() || buf_dirty_count_severe())
 			bawrite(bp);
-		else
+		else if (n + croffset == pmp->pm_bpcluster) {
+			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
+				cluster_write(vp, bp, dep->de_FileSize,
+				    seqcount);
+			else
+				bawrite(bp);
+		} else
 			bdwrite(bp);
 		dep->de_flag |= DE_UPDATE;
 	} while (error == 0 && uio->uio_resid > 0);
@@ -838,11 +878,8 @@
 		struct thread *a_td;
 	} */ *ap;
 {
-	/*
-	 * Flush our dirty buffers.
-	 */
-	vop_stdfsync(ap);
 
+	vop_stdfsync(ap);
 	return (deupdat(VTODE(ap->a_vp), ap->a_waitfor == MNT_WAIT));
 }
 
@@ -972,8 +1009,8 @@
 	/*
 	 * Check for cross-device rename.
 	 */
-	if ((fvp->v_mount != tdvp->v_mount) ||
-	    (tvp && (fvp->v_mount != tvp->v_mount))) {
+	if (fvp->v_mount != tdvp->v_mount ||
+	    (tvp && fvp->v_mount != tvp->v_mount)) {
 		error = EXDEV;
 abortit:
 		if (tdvp == tvp)
@@ -1179,7 +1216,7 @@
 		zp->de_fndoffset = from_diroffset;
 		error = removede(zp, ip);
 		if (error) {
-			/* XXX should really panic here, fs is corrupt */
+			/* XXX should downgrade to ro here, fs is corrupt */
 			if (newparent)
 				VOP_UNLOCK(fdvp, 0, td);
 			VOP_UNLOCK(fvp, 0, td);
@@ -1189,7 +1226,7 @@
 			error = pcbmap(dp, de_cluster(pmp, to_diroffset), 0,
 				       &ip->de_dirclust, 0);
 			if (error) {
-				/* XXX should really panic here, fs is corrupt */
+				/* XXX should downgrade to ro here, fs is corrupt */
 				if (newparent)
 					VOP_UNLOCK(fdvp, 0, td);
 				VOP_UNLOCK(fvp, 0, td);
@@ -1219,7 +1256,7 @@
 		error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster,
 			      NOCRED, &bp);
 		if (error) {
-			/* XXX should really panic here, fs is corrupt */
+			/* XXX should downgrade to ro here, fs is corrupt */
 			brelse(bp);
 			VOP_UNLOCK(fvp, 0, td);
 			goto bad;
@@ -1230,7 +1267,7 @@
 			putushort(dotdotp->deHighClust, dp->de_StartCluster >> 16);
 		error = bwrite(bp);
 		if (error) {
-			/* XXX should really panic here, fs is corrupt */
+			/* XXX should downgrade to ro here, fs is corrupt */
 			VOP_UNLOCK(fvp, 0, td);
 			goto bad;
 		}
@@ -1279,7 +1316,7 @@
 msdosfs_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
-		struvt vnode **a_vpp;
+		struct vnode **a_vpp;
 		struvt componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
@@ -1473,6 +1510,7 @@
 		u_long **a_cookies;
 	} */ *ap;
 {
+	struct mbnambuf nb;
 	int error = 0;
 	int diff;
 	long n;
@@ -1556,12 +1594,14 @@
 							  * dirsperblk;
 				else
 					fileno = 1;
-#ifdef MSDOSFS_LARGE
-				dirbuf.d_fileno = msdosfs_fileno_map(
-				    pmp->pm_mountp, fileno);
-#else
-				dirbuf.d_fileno = (uint32_t)fileno;
-#endif
+				if (pmp->pm_flags & MSDOSFS_LARGEFS) {
+					dirbuf.d_fileno =
+					    msdosfs_fileno_map(pmp->pm_mountp,
+					    fileno);
+				} else {
+
+					dirbuf.d_fileno = (uint32_t)fileno;
+				}
 				dirbuf.d_type = DT_DIR;
 				switch (n) {
 				case 0:
@@ -1590,7 +1630,7 @@
 		}
 	}
 
-	mbnambuf_init();
+	mbnambuf_init(&nb);
 	off = offset;
 	while (uio->uio_resid > 0) {
 		lbn = de_cluster(pmp, offset - bias);
@@ -1637,7 +1677,7 @@
 			 */
 			if (dentp->deName[0] == SLOT_DELETED) {
 				chksum = -1;
-				mbnambuf_init();
+				mbnambuf_init(&nb);
 				continue;
 			}
 
@@ -1647,8 +1687,8 @@
 			if (dentp->deAttributes == ATTR_WIN95) {
 				if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
 					continue;
-				chksum = win2unixfn((struct winentry *)dentp,
-					chksum, pmp);
+				chksum = win2unixfn(&nb,
+				    (struct winentry *)dentp, chksum, pmp);
 				continue;
 			}
 
@@ -1657,7 +1697,7 @@
 			 */
 			if (dentp->deAttributes & ATTR_VOLUME) {
 				chksum = -1;
-				mbnambuf_init();
+				mbnambuf_init(&nb);
 				continue;
 			}
 			/*
@@ -1682,25 +1722,26 @@
 					    dirsperblk;
 				dirbuf.d_type = DT_DIR;
 			} else {
-				fileno = (uint64_t)offset / sizeof(struct direntry);
+				fileno = (uoff_t)offset /
+				    sizeof(struct direntry);
 				dirbuf.d_type = DT_REG;
 			}
-#ifdef MSDOSFS_LARGE
-			dirbuf.d_fileno = msdosfs_fileno_map(pmp->pm_mountp,
-			    fileno);
-#else
-			dirbuf.d_fileno = (uint32_t)fileno;
-#endif
-			if (chksum != winChksum(dentp->deName)) {
+			if (pmp->pm_flags & MSDOSFS_LARGEFS) {
+				dirbuf.d_fileno =
+				    msdosfs_fileno_map(pmp->pm_mountp, fileno);
+			} else
+				dirbuf.d_fileno = (uint32_t)fileno;
+
+			if (chksum != winChksum(dentp)) {
 				dirbuf.d_namlen = dos2unixfn(dentp->deName,
 				    (u_char *)dirbuf.d_name,
 				    dentp->deLowerCase |
 					((pmp->pm_flags & MSDOSFSMNT_SHORTNAME) ?
 					(LCASE_BASE | LCASE_EXT) : 0),
 				    pmp);
-				mbnambuf_init();
+				mbnambuf_init(&nb);
 			} else
-				mbnambuf_flush(&dirbuf);
+				mbnambuf_flush(&nb, &dirbuf);
 			chksum = -1;
 			dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
 			if (uio->uio_resid < dirbuf.d_reclen) {
@@ -1742,12 +1783,16 @@
 	return (error);
 }
 
-/*
- * vp  - address of vnode file the file
- * bn  - which cluster we are interested in mapping to a filesystem block number.
- * vpp - returns the vnode for the block special file holding the filesystem
- *	 containing the file of interest
- * bnp - address of where to return the filesystem relative block number
+/*-
+ * a_vp   - pointer to the file's vnode
+ * a_bn   - logical block number within the file (cluster number for us)
+ * a_bop  - where to return the bufobj of the special file containing the fs
+ * a_bnp  - where to return the "physical" block number corresponding to a_bn
+ *          (relative to the special file; units are blocks of size DEV_BSIZE)
+ * a_runp - where to return the "run past" a_bn.  This is the count of logical
+ *          blocks whose physical blocks (together with a_bn's physical block)
+ *          are contiguous.
+ * a_runb - where to return the "run before" a_bn.
  */
 static int
 msdosfs_bmap(ap)
@@ -1760,26 +1805,54 @@
 		int *a_runb;
 	} */ *ap;
 {
-	struct denode *dep = VTODE(ap->a_vp);
-	daddr_t blkno;
-	int error;
+	struct denode *dep;
+	struct mount *mp;
+	struct msdosfsmount *pmp;
+	struct vnode *vp;
+	daddr_t runbn;
+	u_long cn;
+	int bnpercn, error, maxio, maxrun, run;
 
+	vp = ap->a_vp;
+	dep = VTODE(vp);
+	pmp = dep->de_pmp;
 	if (ap->a_bop != NULL)
-		*ap->a_bop = &dep->de_pmp->pm_devvp->v_bufobj;
+		*ap->a_bop = &pmp->pm_devvp->v_bufobj;
 	if (ap->a_bnp == NULL)
 		return (0);
-	if (ap->a_runp) {
-		/*
-		 * Sequential clusters should be counted here.
-		 */
+	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
-	}
-	if (ap->a_runb) {
+	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
+	cn = ap->a_bn;
+	if (cn != ap->a_bn)
+		return (EFBIG);
+	error = pcbmap(dep, cn, ap->a_bnp, NULL, NULL);
+	if (error != 0 || (ap->a_runp == NULL && ap->a_runb == NULL))
+		return (error);
+
+	mp = vp->v_mount;
+	maxio = mp->mnt_iosize_max / mp->mnt_stat.f_iosize;
+	bnpercn = de_cn2bn(pmp, 1);
+	if (ap->a_runp != NULL) {
+		maxrun = ulmin(maxio - 1, pmp->pm_maxcluster - cn);
+		for (run = 1; run <= maxrun; run++) {
+			if (pcbmap(dep, cn + run, &runbn, NULL, NULL) != 0 ||
+			    runbn != *ap->a_bnp + run * bnpercn)
+				break;
+		}
+		*ap->a_runp = run - 1;
 	}
-	error = pcbmap(dep, ap->a_bn, &blkno, 0, 0);
-	*ap->a_bnp = blkno;
-	return (error);
+	if (ap->a_runb != NULL) {
+		maxrun = ulmin(maxio - 1, cn);
+		for (run = 1; run < maxrun; run++) {
+			if (pcbmap(dep, cn - run, &runbn, NULL, NULL) != 0 ||
+			    runbn != *ap->a_bnp - run * bnpercn)
+				break;
+		}
+		*ap->a_runb = run - 1;
+	}
+	return (0);
 }
 
 static int
@@ -1837,8 +1910,7 @@
 
 	printf("\tstartcluster %lu, dircluster %lu, diroffset %lu, ",
 	       dep->de_StartCluster, dep->de_dirclust, dep->de_diroffset);
-	if (dep->de_dev != NULL) 
-		printf("on dev %s\n", devtoname(dep->de_dev));
+	printf("on dev %s\n", devtoname(dep->de_dev));
 	return (0);
 }
 
@@ -1889,6 +1961,25 @@
 	return (lf_advlock(ap, &dep->de_lockf, dep->de_FileSize));
 }
 
+static int
+msdosfs_vptofh(ap)
+	struct vop_vptofh_args /* {
+		struct vnode *a_vp;
+		struct fid *a_fhp;
+	} */ *ap;
+{
+	struct denode *dep;
+	struct defid *defhp;
+
+	dep = VTODE(ap->a_vp);
+	defhp = (struct defid *)ap->a_fhp;
+	defhp->defid_len = sizeof(struct defid);
+	defhp->defid_dirclust = dep->de_dirclust;
+	defhp->defid_dirofs = dep->de_diroffset;
+	/* defhp->defid_gen = dep->de_gen; */
+	return (0);
+}
+
 /* Global vfs data structures for msdosfs */
 struct vop_vector msdosfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
@@ -1919,4 +2010,5 @@
 	.vop_strategy =		msdosfs_strategy,
 	.vop_symlink =		msdosfs_symlink,
 	.vop_write =		msdosfs_write,
+	.vop_vptofh =		msdosfs_vptofh,
 };
Index: msdosfs_denode.c
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/msdosfs_denode.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/fs/msdosfs/msdosfs_denode.c -L sys/fs/msdosfs/msdosfs_denode.c -u -r1.3 -r1.4
--- sys/fs/msdosfs/msdosfs_denode.c
+++ sys/fs/msdosfs/msdosfs_denode.c
@@ -1,5 +1,4 @@
-/* $MidnightBSD$ */
-/* $FreeBSD: src/sys/fs/msdosfs/msdosfs_denode.c,v 1.88.2.2 2006/03/12 21:50:01 scottl Exp $ */
+/* $FreeBSD: src/sys/fs/msdosfs/msdosfs_denode.c,v 1.97 2007/08/07 03:59:49 bde Exp $ */
 /*	$NetBSD: msdosfs_denode.c,v 1.28 1998/02/10 14:10:00 mrg Exp $	*/
 
 /*-
@@ -51,24 +50,23 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/clock.h>
 #include <sys/kernel.h>
-#include <sys/mount.h>
 #include <sys/malloc.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
+#include <sys/mount.h>
 #include <sys/vnode.h>
-#include <sys/mutex.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #include <fs/msdosfs/bpb.h>
-#include <fs/msdosfs/msdosfsmount.h>
 #include <fs/msdosfs/direntry.h>
 #include <fs/msdosfs/denode.h>
 #include <fs/msdosfs/fat.h>
+#include <fs/msdosfs/msdosfsmount.h>
 
-static MALLOC_DEFINE(M_MSDOSFSNODE, "MSDOSFS node", "MSDOSFS vnode private part");
+static MALLOC_DEFINE(M_MSDOSFSNODE, "msdosfs_node", "MSDOSFS vnode private part");
 
 static int
 de_vncmpf(struct vnode *vp, void *arg)
@@ -107,6 +105,7 @@
 	struct denode *ldep;
 	struct vnode *nvp, *xvp;
 	struct buf *bp;
+	struct thread *td;
 
 #ifdef MSDOSFS_DEBUG
 	printf("deget(pmp %p, dirclust %lu, diroffset %lx, depp %p)\n",
@@ -137,7 +136,7 @@
 	error = vfs_hash_get(mntp, inode, LK_EXCLUSIVE, curthread, &nvp,
 	    de_vncmpf, &inode);
 	if (error)
-		return(error);
+		return (error);
 	if (nvp != NULL) {
 		*depp = VTODE(nvp);
 		KASSERT((*depp)->de_dirclust == dirclust, ("wrong dirclust"));
@@ -172,7 +171,15 @@
 	ldep->de_inode = inode;
 	fc_purge(ldep, 0);	/* init the fat cache for this denode */
 
-	error = vfs_hash_insert(nvp, inode, LK_EXCLUSIVE, curthread, &xvp,
+	td = curthread;
+	lockmgr(nvp->v_vnlock, LK_EXCLUSIVE, NULL, td);
+	error = insmntque(nvp, mntp);
+	if (error != 0) {
+		FREE(ldep, M_MSDOSFSNODE);
+		*depp = NULL;
+		return (error);
+	}
+	error = vfs_hash_insert(nvp, inode, LK_EXCLUSIVE, td, &xvp,
 	    de_vncmpf, &inode);
 	if (error) {
 		*depp = NULL;
@@ -211,7 +218,7 @@
 			ldep->de_FileSize = pmp->pm_rootdirsize * DEV_BSIZE;
 		}
 		/*
-		 * fill in time and date so that dos2unixtime() doesn't
+		 * fill in time and date so that fattime2timespec() doesn't
 		 * spit up when called from msdosfs_getattr() with root
 		 * denode
 		 */
@@ -256,13 +263,13 @@
 		u_long size;
 
 		/*
-		 * XXX Sometimes, these arrives that . entry have cluster
-		 * number 0, when it shouldn't.  Use real cluster number
+		 * XXX it sometimes happens that the "." entry has cluster
+		 * number 0 when it shouldn't.  Use the actual cluster number
 		 * instead of what is written in directory entry.
 		 */
-		if ((diroffset == 0) && (ldep->de_StartCluster != dirclust)) {
-			printf("deget(): . entry at clust %ld != %ld\n",
-					dirclust, ldep->de_StartCluster);
+		if (diroffset == 0 && ldep->de_StartCluster != dirclust) {
+			printf("deget(): \".\" entry at clust %lu != %lu\n",
+			    dirclust, ldep->de_StartCluster);
 			ldep->de_StartCluster = dirclust;
 		}
 
@@ -354,7 +361,6 @@
 		return (EINVAL);
 	}
 
-
 	if (dep->de_FileSize < length) {
 		vnode_pager_setsize(DETOV(dep), length);
 		return deextend(dep, length, cred);
@@ -417,14 +423,14 @@
 	 */
 	dep->de_FileSize = length;
 	if (!isadir)
-		dep->de_flag |= DE_UPDATE|DE_MODIFIED;
+		dep->de_flag |= DE_UPDATE | DE_MODIFIED;
 	allerror = vtruncbuf(DETOV(dep), cred, td, length, pmp->pm_bpcluster);
 #ifdef MSDOSFS_DEBUG
 	if (allerror)
 		printf("detrunc(): vtruncbuf error %d\n", allerror);
 #endif
 	error = deupdat(dep, 1);
-	if (error && (allerror == 0))
+	if (error != 0 && allerror == 0)
 		allerror = error;
 #ifdef MSDOSFS_DEBUG
 	printf("detrunc(): allerror %d, eofentry %lu\n",
@@ -501,7 +507,7 @@
 		}
 	}
 	dep->de_FileSize = length;
-	dep->de_flag |= DE_UPDATE|DE_MODIFIED;
+	dep->de_flag |= DE_UPDATE | DE_MODIFIED;
 	return (deupdat(dep, 1));
 }
 
@@ -529,7 +535,7 @@
 #endif
 	vp = DETOV(dep);
 	dep->de_inode = (uint64_t)dep->de_pmp->pm_bpcluster * dep->de_dirclust +
-	     dep->de_diroffset;
+	    dep->de_diroffset;
 	vfs_hash_rehash(vp, dep->de_inode);
 }
 
Index: msdosfs_fat.c
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/msdosfs_fat.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/msdosfs/msdosfs_fat.c -L sys/fs/msdosfs/msdosfs_fat.c -u -r1.2 -r1.3
--- sys/fs/msdosfs/msdosfs_fat.c
+++ sys/fs/msdosfs/msdosfs_fat.c
@@ -1,5 +1,4 @@
-/* $MidnightBSD$ */
-/* $FreeBSD: src/sys/fs/msdosfs/msdosfs_fat.c,v 1.37 2005/01/06 18:10:38 imp Exp $ */
+/* $FreeBSD: src/sys/fs/msdosfs/msdosfs_fat.c,v 1.48 2007/09/23 14:49:32 bde Exp $ */
 /*	$NetBSD: msdosfs_fat.c,v 1.28 1997/11/17 15:36:49 ws Exp $	*/
 
 /*-
@@ -49,24 +48,17 @@
  * October 1992
  */
 
-/*
- * kernel include files.
- */
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/bio.h>
 #include <sys/buf.h>
-#include <sys/mount.h>		/* to define statfs structure */
-#include <sys/vnode.h>		/* to define vattr structure */
+#include <sys/mount.h>
+#include <sys/vnode.h>
 
-/*
- * msdosfs include files.
- */
 #include <fs/msdosfs/bpb.h>
-#include <fs/msdosfs/msdosfsmount.h>
 #include <fs/msdosfs/direntry.h>
 #include <fs/msdosfs/denode.h>
 #include <fs/msdosfs/fat.h>
+#include <fs/msdosfs/msdosfsmount.h>
 
 /*
  * Fat cache stats.
@@ -80,6 +72,8 @@
 static int fc_lmdistance[LMMAX];/* counters for how far off the last
 				 * cluster mapped entry was. */
 static int fc_largedistance;	/* off by more than LMMAX		 */
+static int fc_wherefrom, fc_whereto, fc_lastclust;
+static int pm_fatblocksize;
 
 static int	chainalloc(struct msdosfsmount *pmp, u_long start,
 		    u_long count, u_long fillwith, u_long *retcluster,
@@ -120,6 +114,7 @@
 		*sizep = size;
 	if (bop)
 		*bop = ofs % pmp->pm_fatblocksize;
+	pm_fatblocksize = pmp->pm_fatblocksize;
 }
 
 /*
@@ -211,9 +206,12 @@
 	 */
 	i = 0;
 	fc_lookup(dep, findcn, &i, &cn);
-	if ((bn = findcn - i) >= LMMAX)
+	if ((bn = findcn - i) >= LMMAX) {
 		fc_largedistance++;
-	else
+		fc_wherefrom = i;
+		fc_whereto = findcn;
+		fc_lastclust = dep->de_fc[FC_LASTFC].fc_frcn;
+	} else
 		fc_lmdistance[bn]++;
 
 	/*
@@ -355,23 +353,7 @@
 	 * If we have an FSInfo block, update it.
 	 */
 	if (pmp->pm_fsinfo) {
-		u_long cn = pmp->pm_nxtfree;
-
-		if (pmp->pm_freeclustercount
-		    && (pmp->pm_inusemap[cn / N_INUSEBITS]
-			& (1 << (cn % N_INUSEBITS)))) {
-			/*
-			 * The cluster indicated in FSInfo isn't free
-			 * any longer.  Got get a new free one.
-			 */
-			for (cn = 0; cn < pmp->pm_maxcluster; cn += N_INUSEBITS)
-				if (pmp->pm_inusemap[cn / N_INUSEBITS] != (u_int)-1)
-					break;
-			pmp->pm_nxtfree = cn
-				+ ffs(pmp->pm_inusemap[cn / N_INUSEBITS]
-				      ^ (u_int)-1) - 1;
-		}
-		if (bread(pmp->pm_devvp, pmp->pm_fsinfo, fsi_size(pmp), 
+		if (bread(pmp->pm_devvp, pmp->pm_fsinfo, pmp->pm_BytesPerSec,
 		    NOCRED, &bpn) != 0) {
 			/*
 			 * Ignore the error, but turn off FSInfo update for the future.
@@ -524,7 +506,7 @@
 
 #ifdef	MSDOSFS_DEBUG
 	printf("fatentry(func %d, pmp %p, clust %lu, oldcon %p, newcon %lx)\n",
-	     function, pmp, cn, oldcontents, newcontents);
+	    function, pmp, cn, oldcontents, newcontents);
 #endif
 
 #ifdef DIAGNOSTIC
@@ -719,7 +701,7 @@
 			break;
 		map = pmp->pm_inusemap[idx];
 		if (map) {
-			len +=  ffs(map) - 1;
+			len += ffs(map) - 1;
 			break;
 		}
 		len += N_INUSEBITS;
@@ -764,6 +746,9 @@
 		*retcluster = start;
 	if (got)
 		*got = count;
+	pmp->pm_nxtfree = start + count;
+	if (pmp->pm_nxtfree > pmp->pm_maxcluster)
+		pmp->pm_nxtfree = CLUST_FIRST;
 	return (0);
 }
 
@@ -793,19 +778,15 @@
 	u_int map;
 
 #ifdef MSDOSFS_DEBUG
-	printf("clusteralloc(): find %lu clusters\n",count);
+	printf("clusteralloc(): find %lu clusters\n", count);
 #endif
 	if (start) {
 		if ((len = chainlength(pmp, start, count)) >= count)
 			return (chainalloc(pmp, start, count, fillwith, retcluster, got));
-	} else 
+	} else
 		len = 0;
 
-	/*
-	 * Start at a (pseudo) random place to maximize cluster runs
-	 * under multiple writers.
-	 */
-	newst = random() % (pmp->pm_maxcluster + 1);
+	newst = pmp->pm_nxtfree;
 	foundl = 0;
 
 	for (cn = newst; cn <= pmp->pm_maxcluster;) {
@@ -1026,6 +1007,7 @@
 			return (error);
 	}
 
+	fc_last_to_nexttolast(dep);
 	while (count > 0) {
 		/*
 		 * Allocate a new cluster chain and cat onto the end of the
@@ -1082,8 +1064,8 @@
 				 */
 				if (dep->de_Attributes & ATTR_DIRECTORY)
 					bp = getblk(pmp->pm_devvp,
-						    cntobn(pmp, cn++),
-						    pmp->pm_bpcluster, 0, 0, 0);
+					    cntobn(pmp, cn++),
+					    pmp->pm_bpcluster, 0, 0, 0);
 				else {
 					bp = getblk(DETOV(dep),
 					    de_cn2bn(pmp, frcn++),
@@ -1100,7 +1082,7 @@
 					else
 						bp->b_blkno = blkno;
 				}
-				clrbuf(bp);
+				vfs_bio_clrbuf(bp);
 				if (bpp) {
 					*bpp = bp;
 					bpp = NULL;
Index: bpb.h
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/bpb.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/msdosfs/bpb.h -L sys/fs/msdosfs/bpb.h -u -r1.2 -r1.3
--- sys/fs/msdosfs/bpb.h
+++ sys/fs/msdosfs/bpb.h
@@ -1,5 +1,4 @@
-/* $MidnightBSD$ */
-/* $FreeBSD: src/sys/fs/msdosfs/bpb.h,v 1.11 2005/01/06 18:10:38 imp Exp $ */
+/* $FreeBSD: src/sys/fs/msdosfs/bpb.h,v 1.16 2007/07/12 16:09:07 bde Exp $ */
 /*	$NetBSD: bpb.h,v 1.7 1997/11/17 15:36:24 ws Exp $	*/
 
 /*-
@@ -87,35 +86,14 @@
  * and longs are just character arrays of the appropriate length.  This is
  * because the compiler forces shorts and longs to align on word or
  * halfword boundaries.
- *
- * XXX The little-endian code here assumes that the processor can access
- * 16-bit and 32-bit quantities on byte boundaries.  If this is not true,
- * use the macros for the big-endian case.
- */
-
-#include <machine/endian.h>
-
-#ifdef __i386__
-#define	UNLALIGNED_ACCESS
-#endif
-
-#if (BYTE_ORDER == LITTLE_ENDIAN) && defined(UNALIGNED_ACCESS)
-#define	getushort(x)	*((u_int16_t *)(x))
-#define	getulong(x)	*((u_int32_t *)(x))
-#define	putushort(p, v)	(*((u_int16_t *)(p)) = (v))
-#define	putulong(p, v)	(*((u_int32_t *)(p)) = (v))
-#else
-#define getushort(x)	(((u_int8_t *)(x))[0] + (((u_int8_t *)(x))[1] << 8))
-#define getulong(x)	(((u_int8_t *)(x))[0] + (((u_int8_t *)(x))[1] << 8) \
-			 + (((u_int8_t *)(x))[2] << 16)	\
-			 + (((u_int8_t *)(x))[3] << 24))
-#define putushort(p, v)	(((u_int8_t *)(p))[0] = (v),	\
-			 ((u_int8_t *)(p))[1] = (v) >> 8)
-#define putulong(p, v)	(((u_int8_t *)(p))[0] = (v),	\
-			 ((u_int8_t *)(p))[1] = (v) >> 8, \
-			 ((u_int8_t *)(p))[2] = (v) >> 16,\
-			 ((u_int8_t *)(p))[3] = (v) >> 24)
-#endif
+ */
+
+#include <sys/endian.h>
+
+#define	getushort(x)	le16dec(x)
+#define	getulong(x)	le32dec(x)
+#define	putushort(p, v)	le16enc(p, v)
+#define	putulong(p, v)	le32enc(p, v)
 
 /*
  * BIOS Parameter Block (BPB) for DOS 3.3
@@ -189,6 +167,4 @@
 	u_int8_t fsinxtfree[4];
 	u_int8_t fsifill2[12];
 	u_int8_t fsisig3[4];
-	u_int8_t fsifill3[508];
-	u_int8_t fsisig4[4];
 };
Index: msdosfs_fileno.c
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/msdosfs_fileno.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/msdosfs/msdosfs_fileno.c -L sys/fs/msdosfs/msdosfs_fileno.c -u -r1.2 -r1.3
--- sys/fs/msdosfs/msdosfs_fileno.c
+++ sys/fs/msdosfs/msdosfs_fileno.c
@@ -43,24 +43,21 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
-__FBSDID("$FreeBSD: src/sys/fs/msdosfs/msdosfs_fileno.c,v 1.1 2004/07/03 13:22:38 tjr Exp $");
+__FBSDID("$FreeBSD: src/sys/fs/msdosfs/msdosfs_fileno.c,v 1.5 2007/08/07 02:25:55 bde Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/conf.h>
 #include <sys/kernel.h>
-#include <sys/mount.h>
-#include <sys/malloc.h>
 #include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
 #include <sys/mutex.h>
 
 #include <fs/msdosfs/bpb.h>
-#include <fs/msdosfs/bootsect.h>
-#include <fs/msdosfs/msdosfsmount.h>
 #include <fs/msdosfs/direntry.h>
+#include <fs/msdosfs/msdosfsmount.h>
 
-static MALLOC_DEFINE(M_MSDOSFSFILENO, "MSDOSFS fileno", "MSDOSFS fileno mapping node");
+static MALLOC_DEFINE(M_MSDOSFSFILENO, "msdosfs_fileno", "MSDOSFS fileno mapping node");
 
 static struct mtx fileno_mtx;
 MTX_SYSINIT(fileno, &fileno_mtx, "MSDOSFS fileno", MTX_DEF);
Index: msdosfs_conv.c
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/msdosfs_conv.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/msdosfs/msdosfs_conv.c -L sys/fs/msdosfs/msdosfs_conv.c -u -r1.2 -r1.3
--- sys/fs/msdosfs/msdosfs_conv.c
+++ sys/fs/msdosfs/msdosfs_conv.c
@@ -1,5 +1,4 @@
-/* $MidnightBSD$ */
-/* $FreeBSD: src/sys/fs/msdosfs/msdosfs_conv.c,v 1.44.2.1 2005/07/23 17:02:04 imura Exp $ */
+/* $FreeBSD: src/sys/fs/msdosfs/msdosfs_conv.c,v 1.53 2007/08/31 22:29:55 bde Exp $ */
 /*	$NetBSD: msdosfs_conv.c,v 1.25 1997/11/17 15:36:40 ws Exp $	*/
 
 /*-
@@ -49,52 +48,17 @@
  * October 1992
  */
 
-/*
- * System include files.
- */
 #include <sys/param.h>
-#include <sys/time.h>
-#include <sys/kernel.h>		/* defines tz */
 #include <sys/systm.h>
-#include <machine/clock.h>
 #include <sys/dirent.h>
 #include <sys/iconv.h>
 #include <sys/mount.h>
-#include <sys/malloc.h>
-
-extern struct iconv_functions *msdosfs_iconv;
 
-/*
- * MSDOSFS include files.
- */
 #include <fs/msdosfs/bpb.h>
-#include <fs/msdosfs/msdosfsmount.h>
 #include <fs/msdosfs/direntry.h>
+#include <fs/msdosfs/msdosfsmount.h>
 
-/*
- * Total number of days that have passed for each month in a regular year.
- */
-static u_short regyear[] = {
-	31, 59, 90, 120, 151, 181,
-	212, 243, 273, 304, 334, 365
-};
-
-/*
- * Total number of days that have passed for each month in a leap year.
- */
-static u_short leapyear[] = {
-	31, 60, 91, 121, 152, 182,
-	213, 244, 274, 305, 335, 366
-};
-
-/*
- * Variables used to remember parts of the last time conversion.  Maybe we
- * can avoid a full conversion.
- */
-static u_long  lasttime;
-static u_long  lastday;
-static u_short lastddate;
-static u_short lastdtime;
+extern struct iconv_functions *msdosfs_iconv;
 
 static int mbsadjpos(const char **, size_t, size_t, int, int, void *handle);
 static u_int16_t dos2unixchr(const u_char **, size_t *, int, struct msdosfsmount *);
@@ -102,150 +66,6 @@
 static u_int16_t win2unixchr(u_int16_t, struct msdosfsmount *);
 static u_int16_t unix2winchr(const u_char **, size_t *, int, struct msdosfsmount *);
 
-static char	*nambuf_ptr;
-static size_t	nambuf_len;
-static int	nambuf_last_id;
-
-/*
- * Convert the unix version of time to dos's idea of time to be used in
- * file timestamps. The passed in unix time is assumed to be in GMT.
- */
-void
-unix2dostime(tsp, ddp, dtp, dhp)
-	struct timespec *tsp;
-	u_int16_t *ddp;
-	u_int16_t *dtp;
-	u_int8_t *dhp;
-{
-	u_long t;
-	u_long days;
-	u_long inc;
-	u_long year;
-	u_long month;
-	u_short *months;
-
-	/*
-	 * If the time from the last conversion is the same as now, then
-	 * skip the computations and use the saved result.
-	 */
-	t = tsp->tv_sec - (tz_minuteswest * 60)
-	    - (wall_cmos_clock ? adjkerntz : 0);
-	    /* - daylight savings time correction */
-	t &= ~1;
-	if (lasttime != t) {
-		lasttime = t;
-		lastdtime = (((t / 2) % 30) << DT_2SECONDS_SHIFT)
-		    + (((t / 60) % 60) << DT_MINUTES_SHIFT)
-		    + (((t / 3600) % 24) << DT_HOURS_SHIFT);
-
-		/*
-		 * If the number of days since 1970 is the same as the last
-		 * time we did the computation then skip all this leap year
-		 * and month stuff.
-		 */
-		days = t / (24 * 60 * 60);
-		if (days != lastday) {
-			lastday = days;
-			for (year = 1970;; year++) {
-				inc = year & 0x03 ? 365 : 366;
-				if (days < inc)
-					break;
-				days -= inc;
-			}
-			months = year & 0x03 ? regyear : leapyear;
-			for (month = 0; days >= months[month]; month++)
-				;
-			if (month > 0)
-				days -= months[month - 1];
-			lastddate = ((days + 1) << DD_DAY_SHIFT)
-			    + ((month + 1) << DD_MONTH_SHIFT);
-			/*
-			 * Remember dos's idea of time is relative to 1980.
-			 * unix's is relative to 1970.  If somehow we get a
-			 * time before 1980 then don't give totally crazy
-			 * results.
-			 */
-			if (year > 1980)
-				lastddate += (year - 1980) << DD_YEAR_SHIFT;
-		}
-	}
-	if (dtp)
-		*dtp = lastdtime;
-	if (dhp)
-		*dhp = (tsp->tv_sec & 1) * 100 + tsp->tv_nsec / 10000000;
-
-	*ddp = lastddate;
-}
-
-/*
- * The number of seconds between Jan 1, 1970 and Jan 1, 1980. In that
- * interval there were 8 regular years and 2 leap years.
- */
-#define	SECONDSTO1980	(((8 * 365) + (2 * 366)) * (24 * 60 * 60))
-
-static u_short lastdosdate;
-static u_long  lastseconds;
-
-/*
- * Convert from dos' idea of time to unix'. This will probably only be
- * called from the stat(), and fstat() system calls and so probably need
- * not be too efficient.
- */
-void
-dos2unixtime(dd, dt, dh, tsp)
-	u_int dd;
-	u_int dt;
-	u_int dh;
-	struct timespec *tsp;
-{
-	u_long seconds;
-	u_long month;
-	u_long year;
-	u_long days;
-	u_short *months;
-
-	if (dd == 0) {
-		/*
-		 * Uninitialized field, return the epoch.
-		 */
-		tsp->tv_sec = 0;
-		tsp->tv_nsec = 0;
-		return;
-	}
-	seconds = (((dt & DT_2SECONDS_MASK) >> DT_2SECONDS_SHIFT) << 1)
-	    + ((dt & DT_MINUTES_MASK) >> DT_MINUTES_SHIFT) * 60
-	    + ((dt & DT_HOURS_MASK) >> DT_HOURS_SHIFT) * 3600
-	    + dh / 100;
-	/*
-	 * If the year, month, and day from the last conversion are the
-	 * same then use the saved value.
-	 */
-	if (lastdosdate != dd) {
-		lastdosdate = dd;
-		days = 0;
-		year = (dd & DD_YEAR_MASK) >> DD_YEAR_SHIFT;
-		days = year * 365;
-		days += year / 4 + 1;	/* add in leap days */
-		if ((year & 0x03) == 0)
-			days--;		/* if year is a leap year */
-		months = year & 0x03 ? regyear : leapyear;
-		month = (dd & DD_MONTH_MASK) >> DD_MONTH_SHIFT;
-		if (month < 1 || month > 12) {
-			printf("dos2unixtime(): month value out of range (%ld)\n",
-			    month);
-			month = 1;
-		}
-		if (month > 1)
-			days += months[month - 2];
-		days += ((dd & DD_DAY_MASK) >> DD_DAY_SHIFT) - 1;
-		lastseconds = (days * 24 * 60 * 60) + SECONDSTO1980;
-	}
-	tsp->tv_sec = seconds + lastseconds + (tz_minuteswest * 60)
-	     + adjkerntz;
-	     /* + daylight savings time correction */
-	tsp->tv_nsec = (dh % 100) * 10000000;
-}
-
 /*
  * 0 - character disallowed in long file name.
  * 1 - character should be replaced by '_' in DOS file name, 
@@ -437,7 +257,8 @@
 	 * Copy the name portion into the unix filename string.
 	 */
 	for (i = 8; i > 0 && *dn != ' ';) {
-		c = dos2unixchr((const u_char **)&dn, &i, lower, pmp);
+		c = dos2unixchr((const u_char **)&dn, &i, lower & LCASE_BASE,
+		    pmp);
 		if (c & 0xff00) {
 			*un++ = c >> 8;
 			thislong++;
@@ -455,7 +276,8 @@
 		*un++ = '.';
 		thislong++;
 		for (i = 3; i > 0 && *dn != ' ';) {
-			c = dos2unixchr((const u_char **)&dn, &i, lower, pmp);
+			c = dos2unixchr((const u_char **)&dn, &i,
+			    lower & LCASE_EXT, pmp);
 			if (c & 0xff00) {
 				*un++ = c >> 8;
 				thislong++;
@@ -774,7 +596,8 @@
  * Returns the checksum or -1 if no match
  */
 int
-winChkName(un, unlen, chksum, pmp)
+winChkName(nbp, un, unlen, chksum, pmp)
+	struct mbnambuf *nbp;
 	const u_char *un;
 	size_t unlen;
 	int chksum;
@@ -786,9 +609,9 @@
 	struct dirent dirbuf;
 
 	/*
-	 * We alread have winentry in mbnambuf
+	 * We already have winentry in *nbp.
 	 */
-	if (!mbnambuf_flush(&dirbuf) || !dirbuf.d_namlen)
+	if (!mbnambuf_flush(nbp, &dirbuf) || dirbuf.d_namlen == 0)
 		return -1;
 
 #ifdef MSDOSFS_DEBUG
@@ -823,7 +646,8 @@
  * Returns the checksum or -1 if impossible
  */
 int
-win2unixfn(wep, chksum, pmp)
+win2unixfn(nbp, wep, chksum, pmp)
+	struct mbnambuf *nbp;
 	struct winentry *wep;
 	int chksum;
 	struct msdosfsmount *pmp;
@@ -856,7 +680,7 @@
 		switch (code) {
 		case 0:
 			*np = '\0';
-			mbnambuf_write(name, (wep->weCnt & WIN_CNT) - 1);
+			mbnambuf_write(nbp, name, (wep->weCnt & WIN_CNT) - 1);
 			return chksum;
 		case '/':
 			*np = '\0';
@@ -875,7 +699,7 @@
 		switch (code) {
 		case 0:
 			*np = '\0';
-			mbnambuf_write(name, (wep->weCnt & WIN_CNT) - 1);
+			mbnambuf_write(nbp, name, (wep->weCnt & WIN_CNT) - 1);
 			return chksum;
 		case '/':
 			*np = '\0';
@@ -894,7 +718,7 @@
 		switch (code) {
 		case 0:
 			*np = '\0';
-			mbnambuf_write(name, (wep->weCnt & WIN_CNT) - 1);
+			mbnambuf_write(nbp, name, (wep->weCnt & WIN_CNT) - 1);
 			return chksum;
 		case '/':
 			*np = '\0';
@@ -909,7 +733,7 @@
 		cp += 2;
 	}
 	*np = '\0';
-	mbnambuf_write(name, (wep->weCnt & WIN_CNT) - 1);
+	mbnambuf_write(nbp, name, (wep->weCnt & WIN_CNT) - 1);
 	return chksum;
 }
 
@@ -917,22 +741,21 @@
  * Compute the unrolled checksum of a DOS filename for Win95 LFN use.
  */
 u_int8_t
-winChksum(name)
-	u_int8_t *name;
+winChksum(struct direntry *dep)
 {
 	u_int8_t s;
 
-	s = name[0];
-	s = ((s << 7) | (s >> 1)) + name[1];
-	s = ((s << 7) | (s >> 1)) + name[2];
-	s = ((s << 7) | (s >> 1)) + name[3];
-	s = ((s << 7) | (s >> 1)) + name[4];
-	s = ((s << 7) | (s >> 1)) + name[5];
-	s = ((s << 7) | (s >> 1)) + name[6];
-	s = ((s << 7) | (s >> 1)) + name[7];
-	s = ((s << 7) | (s >> 1)) + name[8];
-	s = ((s << 7) | (s >> 1)) + name[9];
-	s = ((s << 7) | (s >> 1)) + name[10];
+	s = dep->deName[0];
+	s = ((s << 7) | (s >> 1)) + dep->deName[1];
+	s = ((s << 7) | (s >> 1)) + dep->deName[2];
+	s = ((s << 7) | (s >> 1)) + dep->deName[3];
+	s = ((s << 7) | (s >> 1)) + dep->deName[4];
+	s = ((s << 7) | (s >> 1)) + dep->deName[5];
+	s = ((s << 7) | (s >> 1)) + dep->deName[6];
+	s = ((s << 7) | (s >> 1)) + dep->deName[7];
+	s = ((s << 7) | (s >> 1)) + dep->deExtension[0];
+	s = ((s << 7) | (s >> 1)) + dep->deExtension[1];
+	s = ((s << 7) | (s >> 1)) + dep->deExtension[2];
 
 	return (s);
 }
@@ -1211,19 +1034,15 @@
 }
 
 /*
- * Initialize the temporary concatenation buffer (once) and mark it as
- * empty on subsequent calls.
+ * Initialize the temporary concatenation buffer.
  */
 void
-mbnambuf_init(void)
+mbnambuf_init(struct mbnambuf *nbp)
 {
 
-        if (nambuf_ptr == NULL) { 
-		nambuf_ptr = malloc(MAXNAMLEN + 1, M_MSDOSFSMNT, M_WAITOK);
-		nambuf_ptr[MAXNAMLEN] = '\0';
-	}
-	nambuf_len = 0;
-	nambuf_last_id = -1;
+	nbp->nb_len = 0;
+	nbp->nb_last_id = -1;
+	nbp->nb_buf[sizeof(nbp->nb_buf) - 1] = '\0';
 }
 
 /*
@@ -1236,30 +1055,31 @@
  * WIN_CHARS bytes when they are first encountered.
  */
 void
-mbnambuf_write(char *name, int id)
+mbnambuf_write(struct mbnambuf *nbp, char *name, int id)
 {
-	size_t count;
 	char *slot;
+	size_t count, newlen;
 
-	KASSERT(nambuf_len == 0 || id == nambuf_last_id - 1,
-	    ("non-decreasing id, id %d last id %d", id, nambuf_last_id));
+	KASSERT(nbp->nb_len == 0 || id == nbp->nb_last_id - 1,
+	    ("non-decreasing id: id %d, last id %d", id, nbp->nb_last_id));
 
-	/* Store this substring in a WIN_CHAR-aligned slot. */
-	slot = nambuf_ptr + (id * WIN_CHARS);
+	/* Will store this substring in a WIN_CHARS-aligned slot. */
+	slot = &nbp->nb_buf[id * WIN_CHARS];
 	count = strlen(name);
-	if (nambuf_len + count > MAXNAMLEN) {
-		printf("msdosfs: file name %zu too long\n", nambuf_len + count);
+	newlen = nbp->nb_len + count;
+	if (newlen > WIN_MAXLEN || newlen > MAXNAMLEN) {
+		printf("msdosfs: file name length %zu too large\n", newlen);
 		return;
 	}
 
 	/* Shift suffix upwards by the amount length exceeds WIN_CHARS. */
-	if (count > WIN_CHARS && nambuf_len != 0)
-		bcopy(slot + WIN_CHARS, slot + count, nambuf_len);
+	if (count > WIN_CHARS && nbp->nb_len != 0)
+		bcopy(slot + WIN_CHARS, slot + count, nbp->nb_len);
 
 	/* Copy in the substring to its slot and update length so far. */
 	bcopy(name, slot, count);
-	nambuf_len += count;
-	nambuf_last_id = id;
+	nbp->nb_len = newlen;
+	nbp->nb_last_id = id;
 }
 
 /*
@@ -1270,17 +1090,17 @@
  * have been written via mbnambuf_write(), the result will be incorrect.
  */
 char *
-mbnambuf_flush(struct dirent *dp)
+mbnambuf_flush(struct mbnambuf *nbp, struct dirent *dp)
 {
 
-	if (nambuf_len > sizeof(dp->d_name) - 1) {
-		mbnambuf_init();
+	if (nbp->nb_len > sizeof(dp->d_name) - 1) {
+		mbnambuf_init(nbp);
 		return (NULL);
 	}
-	bcopy(nambuf_ptr, dp->d_name, nambuf_len);
-	dp->d_name[nambuf_len] = '\0';
-	dp->d_namlen = nambuf_len;
+	bcopy(&nbp->nb_buf[0], dp->d_name, nbp->nb_len);
+	dp->d_name[nbp->nb_len] = '\0';
+	dp->d_namlen = nbp->nb_len;
 
-	mbnambuf_init();
+	mbnambuf_init(nbp);
 	return (dp->d_name);
 }
Index: ntfs_vfsops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/ntfs/ntfs_vfsops.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/fs/ntfs/ntfs_vfsops.c -L sys/fs/ntfs/ntfs_vfsops.c -u -r1.1.1.2 -r1.2
--- sys/fs/ntfs/ntfs_vfsops.c
+++ sys/fs/ntfs/ntfs_vfsops.c
@@ -25,7 +25,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/ntfs/ntfs_vfsops.c,v 1.78.2.2 2006/02/20 00:53:13 yar Exp $
+ * $FreeBSD: src/sys/fs/ntfs/ntfs_vfsops.c,v 1.88 2007/09/21 23:50:15 rodrigc Exp $
  */
 
 
@@ -60,10 +60,10 @@
 #include <fs/ntfs/ntfs_ihash.h>
 #include <fs/ntfs/ntfsmount.h>
 
-static MALLOC_DEFINE(M_NTFSMNT, "NTFS mount", "NTFS mount structure");
-MALLOC_DEFINE(M_NTFSNTNODE,"NTFS ntnode",  "NTFS ntnode information");
-MALLOC_DEFINE(M_NTFSFNODE,"NTFS fnode",  "NTFS fnode information");
-MALLOC_DEFINE(M_NTFSDIR,"NTFS dir",  "NTFS dir buffer");
+static MALLOC_DEFINE(M_NTFSMNT, "ntfs_mount", "NTFS mount structure");
+MALLOC_DEFINE(M_NTFSNTNODE,"ntfs_ntnode",  "NTFS ntnode information");
+MALLOC_DEFINE(M_NTFSFNODE,"ntfs_fnode",  "NTFS fnode information");
+MALLOC_DEFINE(M_NTFSDIR,"ntfs_dir",  "NTFS dir buffer");
 
 struct sockaddr;
 
@@ -80,7 +80,6 @@
 static vfs_root_t       ntfs_root;
 static vfs_statfs_t     ntfs_statfs;
 static vfs_unmount_t    ntfs_unmount;
-static vfs_vptofh_t     ntfs_vptofh;
 
 static b_strategy_t     ntfs_bufstrategy;
 
@@ -157,7 +156,6 @@
 	struct vnode	*devvp;
 	struct nameidata ndp;
 	char *from;
-	struct export_args export;
 
 	if (vfs_filteropt(mp->mnt_optnew, ntfs_opts))
 		return (EINVAL);
@@ -171,20 +169,14 @@
 	 * read/write.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
-		error = vfs_copyopt(mp->mnt_optnew, "export",	
-		    &export, sizeof export);
-		if ((error == 0) && export.ex_flags != 0) {
-			/*
-			 * Process export requests.  Jumping to "success"
-			 * will return the vfs_export() error code.
-			 */
-			err = vfs_export(mp, &export);
+		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) {
+			/* Process export requests in vfs_mount.c */
 			goto success;
+		} else {
+			printf("ntfs_mount(): MNT_UPDATE not supported\n");
+			err = EINVAL;
+			goto error_1;
 		}
-
-		printf("ntfs_mount(): MNT_UPDATE not supported\n");
-		err = EINVAL;
-		goto error_1;
 	}
 
 	/*
@@ -269,12 +261,26 @@
 	int error, ronly, i, v;
 	struct vnode *vp;
 	struct g_consumer *cp;
+	struct g_provider *pp;
 	char *cs_ntfs, *cs_local;
 
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	DROP_GIANT();
 	g_topology_lock();
-	error = g_vfs_open(devvp, &cp, "ntfs", ronly ? 0 : 1);
+
+	/*
+	 * XXX: Do not allow more than one consumer to open a device
+	 *      associated with a particular GEOM provider.
+	 *      This disables multiple read-only mounts of a device,
+	 *      but it gets rid of panics in vget() when you try to
+	 *      mount the same device more than once.
+	 */
+	pp = g_dev_getprovider(devvp->v_rdev);
+ 	if ((pp != NULL) && ((pp->acr | pp->acw | pp->ace ) != 0)) 
+		error = EPERM;
+	else 
+		error = g_vfs_open(devvp, &cp, "ntfs", ronly ? 0 : 1);
+
 	g_topology_unlock();
 	PICKUP_GIANT();
 	VOP_UNLOCK(devvp, 0, td);
@@ -299,7 +305,7 @@
 	brelse( bp );
 	bp = NULL;
 
-	if (strncmp(ntmp->ntm_bootfile.bf_sysid, NTFS_BBID, NTFS_BBIDLEN)) {
+	if (strncmp((const char *)ntmp->ntm_bootfile.bf_sysid, NTFS_BBID, NTFS_BBIDLEN)) {
 		error = EINVAL;
 		dprintf(("ntfs_mountfs: invalid boot block\n"));
 		goto out;
@@ -334,10 +340,10 @@
 	ntmp->ntm_bo = &devvp->v_bufobj;
 
 	cs_local = vfs_getopts(mp->mnt_optnew, "cs_local", &error);
-	if (error)
+	if (error && error != ENOENT)
 		goto out;
 	cs_ntfs = vfs_getopts(mp->mnt_optnew, "cs_ntfs", &error);
-	if (error)
+	if (error && error != ENOENT)
 		goto out;
 	/* Copy in the 8-bit to Unicode conversion table */
 	/* Initialize Unicode to 8-bit table from 8toU table */
@@ -437,7 +443,9 @@
 	mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = 0;
+	MNT_ILOCK(mp);
 	mp->mnt_flag |= MNT_LOCAL;
+	MNT_IUNLOCK(mp);
 	return (0);
 
 out1:
@@ -514,7 +522,9 @@
 	ntfs_u28_uninit(ntmp);
 	ntfs_82u_uninit(ntmp);
 	mp->mnt_data = (qaddr_t)0;
+	MNT_ILOCK(mp);
 	mp->mnt_flag &= ~MNT_LOCAL;
+	MNT_IUNLOCK(mp);
 	FREE(ntmp->ntm_ad, M_NTFSMNT);
 	FREE(ntmp, M_NTFSMNT);
 	return (error);
@@ -621,25 +631,7 @@
 	/* XXX as unlink/rmdir/mkdir/creat are not currently possible
 	 * with NTFS, we don't need to check anything else for now */
 	*vpp = nvp;
-	vnode_create_vobject_off(nvp, VTOF(nvp)->f_size, curthread);
-	return (0);
-}
-
-static int
-ntfs_vptofh(
-	struct vnode *vp,
-	struct fid *fhp)
-{
-	register struct ntnode *ntp;
-	register struct ntfid *ntfhp;
-
-	ddprintf(("ntfs_fhtovp(): %p\n", vp));
-
-	ntp = VTONT(vp);
-	ntfhp = (struct ntfid *)fhp;
-	ntfhp->ntfid_len = sizeof(struct ntfid);
-	ntfhp->ntfid_ino = ntp->i_number;
-	/* ntfhp->ntfid_gen = ntp->i_gen; */
+	vnode_create_vobject(nvp, VTOF(nvp)->f_size, curthread);
 	return (0);
 }
 
@@ -728,6 +720,13 @@
 		ntfs_ntput(ip);
 		return (error);
 	}
+	/* XXX: Too early for mpsafe fs, lacks vnode lock */
+	error = insmntque(vp, ntmp->ntm_mountp);
+	if (error) {
+		ntfs_frele(fp);
+		ntfs_ntput(ip);
+		return (error);
+	}
 	dprintf(("ntfs_vget: vnode: %p for ntnode: %d\n", vp,ino));
 
 	fp->f_vp = vp;
@@ -790,7 +789,6 @@
 	.vfs_uninit =	ntfs_uninit,
 	.vfs_unmount =	ntfs_unmount,
 	.vfs_vget =	ntfs_vget,
-	.vfs_vptofh =	ntfs_vptofh,
 };
 VFS_SET(ntfs_vfsops, ntfs, 0);
 MODULE_VERSION(ntfs, 1);
Index: ntfs.h
===================================================================
RCS file: /home/cvs/src/sys/fs/ntfs/ntfs.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/ntfs/ntfs.h -L sys/fs/ntfs/ntfs.h -u -r1.1.1.1 -r1.2
--- sys/fs/ntfs/ntfs.h
+++ sys/fs/ntfs/ntfs.h
@@ -25,7 +25,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/ntfs/ntfs.h,v 1.19 2004/12/06 20:22:16 phk Exp $
+ * $FreeBSD: src/sys/fs/ntfs/ntfs.h,v 1.20 2005/09/11 15:57:07 rodrigc Exp $
  */
 
 /*#define NTFS_DEBUG 1*/
@@ -285,7 +285,6 @@
 #define	ntfs_bpbl	(daddr_t)((ntmp)->ntm_bps)
 
 #ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_NTFSMNT);
 MALLOC_DECLARE(M_NTFSNTNODE);
 MALLOC_DECLARE(M_NTFSFNODE);
 MALLOC_DECLARE(M_NTFSDIR);
Index: ntfs_ihash.c
===================================================================
RCS file: /home/cvs/src/sys/fs/ntfs/ntfs_ihash.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/ntfs/ntfs_ihash.c -L sys/fs/ntfs/ntfs_ihash.c -u -r1.1.1.1 -r1.2
--- sys/fs/ntfs/ntfs_ihash.c
+++ sys/fs/ntfs/ntfs_ihash.c
@@ -29,7 +29,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)ufs_ihash.c	8.7 (Berkeley) 5/17/95
- * $FreeBSD: src/sys/fs/ntfs/ntfs_ihash.c,v 1.21 2005/01/06 18:10:39 imp Exp $
+ * $FreeBSD: src/sys/fs/ntfs/ntfs_ihash.c,v 1.22.2.1 2007/11/29 19:02:05 maxim Exp $
  */
 
 #include <sys/param.h>
@@ -45,7 +45,7 @@
 #include <fs/ntfs/ntfs_inode.h>
 #include <fs/ntfs/ntfs_ihash.h>
 
-MALLOC_DEFINE(M_NTFSNTHASH, "NTFS nthash", "NTFS ntnode hash tables");
+MALLOC_DEFINE(M_NTFSNTHASH, "ntfs_nthash", "NTFS ntnode hash tables");
 
 /*
  * Structures associated with inode cacheing.
@@ -73,6 +73,7 @@
 void
 ntfs_nthashdestroy(void)
 {
+	hashdestroy(ntfs_nthashtbl, M_NTFSNTHASH, ntfs_nthash);
 	lockdestroy(&ntfs_hashlock);
 	mtx_destroy(&ntfs_nthash_mtx);
 }
Index: ntfs_subr.c
===================================================================
RCS file: /home/cvs/src/sys/fs/ntfs/ntfs_subr.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/fs/ntfs/ntfs_subr.c -L sys/fs/ntfs/ntfs_subr.c -u -r1.1.1.2 -r1.2
--- sys/fs/ntfs/ntfs_subr.c
+++ sys/fs/ntfs/ntfs_subr.c
@@ -25,7 +25,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/ntfs/ntfs_subr.c,v 1.38.2.1 2006/01/08 20:28:48 maxim Exp $
+ * $FreeBSD: src/sys/fs/ntfs/ntfs_subr.c,v 1.42 2006/11/20 19:28:36 le Exp $
  */
 
 #include <sys/param.h>
@@ -51,10 +51,10 @@
 #include <fs/ntfs/ntfs_compr.h>
 #include <fs/ntfs/ntfs_ihash.h>
 
-MALLOC_DEFINE(M_NTFSNTVATTR, "NTFS vattr", "NTFS file attribute information");
-MALLOC_DEFINE(M_NTFSRDATA, "NTFS res data", "NTFS resident data");
-MALLOC_DEFINE(M_NTFSRUN, "NTFS vrun", "NTFS vrun storage");
-MALLOC_DEFINE(M_NTFSDECOMP, "NTFS decomp", "NTFS decompression temporary");
+MALLOC_DEFINE(M_NTFSNTVATTR, "ntfs_vattr", "NTFS file attribute information");
+MALLOC_DEFINE(M_NTFSRDATA, "ntfsd_resdata", "NTFS resident data");
+MALLOC_DEFINE(M_NTFSRUN, "ntfs_vrun", "NTFS vrun storage");
+MALLOC_DEFINE(M_NTFSDECOMP, "ntfs_decomp", "NTFS decompression temporary");
 
 static int ntfs_ntlookupattr(struct ntfsmount *, const char *, int, int *, char **);
 static int ntfs_findvattr(struct ntfsmount *, struct ntnode *, struct ntvattr **, struct ntvattr **, u_int32_t, const char *, size_t, cn_t);
@@ -889,7 +889,7 @@
 	cn_t            cn;	/* VCN in current attribute */
 	caddr_t         rdbuf;	/* Buffer to read directory's blocks  */
 	u_int32_t       blsize;
-	u_int32_t       rdsize;	/* Length of data to read from current block */
+	u_int64_t       rdsize;	/* Length of data to read from current block */
 	struct attr_indexentry *iep;
 	int             error, res, anamelen, fnamelen;
 	const char     *fname,*aname;
@@ -927,7 +927,7 @@
 			break;
 		}
 
-	dprintf(("ntfs_ntlookupfile: blksz: %d, rdsz: %d\n", blsize, rdsize));
+	dprintf(("ntfs_ntlookupfile: blksz: %d, rdsz: %jd\n", blsize, rdsize));
 
 	MALLOC(rdbuf, caddr_t, blsize, M_TEMP, M_WAITOK);
 
@@ -1175,7 +1175,7 @@
 			goto fail;
 		}
 		cpbl = ntfs_btocn(blsize + ntfs_cntob(1) - 1);
-		dprintf(("ntfs_ntreaddir: indexalloc: %d, cpbl: %d\n",
+		dprintf(("ntfs_ntreaddir: indexalloc: %jd, cpbl: %d\n",
 			 iavap->va_datalen, cpbl));
 	} else {
 		dprintf(("ntfs_ntreadidir: w/o BitMap and IndexAllocation\n"));
@@ -1388,7 +1388,7 @@
 					ntfs_btocn(off), &vap);
 		if (error)
 			return (error);
-		towrite = min(left, ntfs_cntob(vap->va_vcnend + 1) - off);
+		towrite = MIN(left, ntfs_cntob(vap->va_vcnend + 1) - off);
 		ddprintf(("ntfs_writeattr_plain: o: %d, s: %d (%d - %d)\n",
 			 (u_int32_t) off, (u_int32_t) towrite,
 			 (u_int32_t) vap->va_vcnstart,
@@ -1433,7 +1433,7 @@
 	struct uio *uio)
 {
 	int             error = 0;
-	int             off;
+	off_t           off;
 	int             cnt;
 	cn_t            ccn, ccl, cn, left, cl;
 	caddr_t         data = rdata;
@@ -1483,7 +1483,7 @@
 			 * blocks at the same disk offsets to avoid
 			 * confusing the buffer cache.
 			 */
-			tocopy = min(left, ntfs_cntob(1) - off);
+			tocopy = MIN(left, ntfs_cntob(1) - off);
 			cl = ntfs_btocl(tocopy + off);
 			KASSERT(cl == 1 && tocopy <= ntfs_cntob(1),
 			    ("single cluster limit mistake"));
@@ -1544,7 +1544,7 @@
 	struct uio *uio)
 {
 	int             error = 0;
-	int             off;
+	off_t           off;
 
 	*initp = 0;
 	if (vap->va_flag & NTFS_AF_INRUN) {
@@ -1589,7 +1589,7 @@
 					 * same disk offsets to avoid
 					 * confusing the buffer cache.
 					 */
-					tocopy = min(left,
+					tocopy = MIN(left,
 					    ntfs_cntob(1) - off);
 					cl = ntfs_btocl(tocopy + off);
 					KASSERT(cl == 1 &&
@@ -1628,7 +1628,7 @@
 					ccl -= cl;
 				}
 			} else {
-				tocopy = min(left, ntfs_cntob(ccl) - off);
+				tocopy = MIN(left, ntfs_cntob(ccl) - off);
 				ddprintf(("ntfs_readntvattr_plain: "
 					"hole: ccn: 0x%x ccl: %d, off: %d, " \
 					" len: %d, left: %d\n", 
@@ -1690,7 +1690,7 @@
 					ntfs_btocn(off), &vap);
 		if (error)
 			return (error);
-		toread = min(left, ntfs_cntob(vap->va_vcnend + 1) - off);
+		toread = MIN(left, ntfs_cntob(vap->va_vcnend + 1) - off);
 		ddprintf(("ntfs_readattr_plain: o: %d, s: %d (%d - %d)\n",
 			 (u_int32_t) off, (u_int32_t) toread,
 			 (u_int32_t) vap->va_vcnstart,
@@ -1775,7 +1775,7 @@
 			if (error)
 				break;
 
-			tocopy = min(left, ntfs_cntob(NTFS_COMPUNIT_CL) - off);
+			tocopy = MIN(left, ntfs_cntob(NTFS_COMPUNIT_CL) - off);
 
 			if (init == ntfs_cntob(NTFS_COMPUNIT_CL)) {
 				if (uio)
@@ -1815,7 +1815,7 @@
 	return (error);
 }
 
-#if UNUSED_CODE
+#if 0
 int
 ntfs_parserun(
 	      cn_t * cn,
@@ -1905,7 +1905,7 @@
 	return (0);
 }
 
-#if UNUSED_CODE
+#if 0
 int
 ntfs_runtocn(
 	     cn_t * cn,	
Index: ntfs_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/ntfs/ntfs_vnops.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/ntfs/ntfs_vnops.c -L sys/fs/ntfs/ntfs_vnops.c -u -r1.2 -r1.3
--- sys/fs/ntfs/ntfs_vnops.c
+++ sys/fs/ntfs/ntfs_vnops.c
@@ -31,7 +31,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/ntfs/ntfs_vnops.c,v 1.55.2.2 2006/03/12 21:50:01 scottl Exp $
+ * $FreeBSD: src/sys/fs/ntfs/ntfs_vnops.c,v 1.60 2007/02/15 22:08:32 pjd Exp $
  *
  */
 
@@ -80,6 +80,7 @@
 static vop_cachedlookup_t	ntfs_lookup;
 static vop_fsync_t	ntfs_fsync;
 static vop_pathconf_t	ntfs_pathconf;
+static vop_vptofh_t	ntfs_vptofh;
 
 int	ntfs_prtactive = 1;	/* 1 => print out reclaim of active vnodes */
 
@@ -138,7 +139,7 @@
 	if (uio->uio_offset > fp->f_size)
 		return (0);
 
-	resid = min(uio->uio_resid, fp->f_size - uio->uio_offset);
+	resid = MIN(uio->uio_resid, fp->f_size - uio->uio_offset);
 
 	dprintf((", resid: %d\n", resid));
 
@@ -147,7 +148,7 @@
 		cn = ntfs_btocn(uio->uio_offset);
 		off = ntfs_btocnoff(uio->uio_offset);
 
-		toread = min(off + resid, ntfs_cntob(1));
+		toread = MIN(off + resid, ntfs_cntob(1));
 
 		error = bread(vp, cn, ntfs_cntob(1), NOCRED, &bp);
 		if (error) {
@@ -187,7 +188,7 @@
 	vap->va_fsid = dev2udev(ip->i_dev);
 	vap->va_fileid = ip->i_number;
 	vap->va_mode = ip->i_mp->ntm_mode;
-	vap->va_nlink = ip->i_nlink;
+	vap->va_nlink = (ip->i_nlink || ip->i_flag & IN_LOADED ? ip->i_nlink : 1);
 	vap->va_uid = ip->i_mp->ntm_uid;
 	vap->va_gid = ip->i_mp->ntm_gid;
 	vap->va_rdev = 0;				/* XXX UNODEV ? */
@@ -285,7 +286,7 @@
 		(u_int32_t)bp->b_offset,(u_int32_t)bp->b_blkno,
 		(u_int32_t)bp->b_lblkno));
 
-	dprintf(("strategy: bcount: %d flags: 0x%lx\n", 
+	dprintf(("strategy: bcount: %d flags: 0x%x\n", 
 		(u_int32_t)bp->b_bcount,bp->b_flags));
 
 	if (bp->b_iocmd == BIO_READ) {
@@ -295,7 +296,7 @@
 			clrbuf(bp);
 			error = 0;
 		} else {
-			toread = min(bp->b_bcount,
+			toread = MIN(bp->b_bcount,
 				 fp->f_size-ntfs_cntob(bp->b_blkno));
 			dprintf(("ntfs_strategy: toread: %d, fsize: %d\n",
 				toread,(u_int32_t)fp->f_size));
@@ -321,7 +322,7 @@
 			bp->b_error = error = EFBIG;
 			bp->b_ioflags |= BIO_ERROR;
 		} else {
-			towrite = min(bp->b_bcount,
+			towrite = MIN(bp->b_bcount,
 				fp->f_size-ntfs_cntob(bp->b_blkno));
 			dprintf(("ntfs_strategy: towrite: %d, fsize: %d\n",
 				towrite,(u_int32_t)fp->f_size));
@@ -367,7 +368,7 @@
 		return (EFBIG);
 	}
 
-	towrite = min(uio->uio_resid, fp->f_size - uio->uio_offset);
+	towrite = MIN(uio->uio_resid, fp->f_size - uio->uio_offset);
 
 	dprintf((", towrite: %d\n",(u_int32_t)towrite));
 
@@ -438,14 +439,14 @@
 		struct thread *a_td;
 	} */ *ap;
 {
-#if NTFS_DEBUG
+#ifdef NTFS_DEBUG
 	register struct vnode *vp = ap->a_vp;
 	register struct ntnode *ip = VTONT(vp);
 
 	printf("ntfs_open: %d\n",ip->i_number);
 #endif
 
-	vnode_create_vobject_off(ap->a_vp, VTOF(ap->a_vp)->f_size, ap->a_td);
+	vnode_create_vobject(ap->a_vp, VTOF(ap->a_vp)->f_size, ap->a_td);
 
 	/*
 	 * Files marked append-only must be opened for appending.
@@ -469,7 +470,7 @@
 		struct thread *a_td;
 	} */ *ap;
 {
-#if NTFS_DEBUG
+#ifdef NTFS_DEBUG
 	register struct vnode *vp = ap->a_vp;
 	register struct ntnode *ip = VTONT(vp);
 
@@ -731,6 +732,26 @@
 	/* NOTREACHED */
 }
 
+int
+ntfs_vptofh(ap)
+	struct vop_vptofh_args /* {
+		struct vnode *a_vp;
+		struct fid *a_fhp;
+	} */ *ap;
+{
+	register struct ntnode *ntp;
+	register struct ntfid *ntfhp;
+
+	ddprintf(("ntfs_fhtovp(): %p\n", ap->a_vp));
+
+	ntp = VTONT(ap->a_vp);
+	ntfhp = (struct ntfid *)ap->a_fhp;
+	ntfhp->ntfid_len = sizeof(struct ntfid);
+	ntfhp->ntfid_ino = ntp->i_number;
+	/* ntfhp->ntfid_gen = ntp->i_gen; */
+	return (0);
+}
+
 /*
  * Global vfs data structures
  */
@@ -752,4 +773,5 @@
 	.vop_reclaim =		ntfs_reclaim,
 	.vop_strategy =		ntfs_strategy,
 	.vop_write =		ntfs_write,
+	.vop_vptofh =		ntfs_vptofh,
 };
Index: ntfs_subr.h
===================================================================
RCS file: /home/cvs/src/sys/fs/ntfs/ntfs_subr.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/ntfs/ntfs_subr.h -L sys/fs/ntfs/ntfs_subr.h -u -r1.1.1.1 -r1.2
--- sys/fs/ntfs/ntfs_subr.h
+++ sys/fs/ntfs/ntfs_subr.h
@@ -25,7 +25,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/ntfs/ntfs_subr.h,v 1.13 2003/09/26 20:26:23 fjoe Exp $
+ * $FreeBSD: src/sys/fs/ntfs/ntfs_subr.h,v 1.14 2006/11/20 19:28:36 le Exp $
  */
 
 #define	VA_LOADED		0x0001
@@ -45,8 +45,8 @@
 
 	u_int32_t		va_compression;
 	u_int32_t		va_compressalg;
-	u_int32_t		va_datalen;
-	u_int32_t		va_allocated;
+	u_int64_t		va_datalen;
+	u_int64_t		va_allocated;
 	cn_t	 		va_vcnstart;
 	cn_t	 		va_vcnend;
 	u_int16_t		va_index;
Index: nwfs_subr.c
===================================================================
RCS file: /home/cvs/src/sys/fs/nwfs/nwfs_subr.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/nwfs/nwfs_subr.c -L sys/fs/nwfs/nwfs_subr.c -u -r1.1.1.1 -r1.2
--- sys/fs/nwfs/nwfs_subr.c
+++ sys/fs/nwfs/nwfs_subr.c
@@ -29,14 +29,15 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/nwfs/nwfs_subr.c,v 1.13 2005/05/11 19:08:38 kan Exp $
+ * $FreeBSD: src/sys/fs/nwfs/nwfs_subr.c,v 1.17 2006/10/24 11:43:41 phk Exp $
  */
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
+#include <sys/clock.h>
 #include <sys/lockmgr.h>
 #include <sys/malloc.h>
-#include <machine/clock.h>
+#include <sys/clock.h>
 #include <sys/time.h>
 
 #include <netncp/ncp.h>
@@ -52,7 +53,7 @@
 
 #define NCP_INFOSZ	(sizeof(struct nw_entry_info) - 257)
 
-MALLOC_DEFINE(M_NWFSDATA, "NWFS data", "NWFS private data");
+MALLOC_DEFINE(M_NWFSDATA, "nwfs_data", "NWFS private data");
 
 static int
 ncp_extract_file_info(struct nwmount *nmp, struct ncp_rq *rqp,
@@ -523,61 +524,13 @@
 	return error;
 }
 
-/* 
- * Time & date conversion routines taken from msdosfs. Although leap
- * year calculation is bogus, it's sufficient before 2100 :)
- */
-/*
- * This is the format of the contents of the deTime field in the direntry
- * structure.
- * We don't use bitfields because we don't know how compilers for
- * arbitrary machines will lay them out.
- */
-#define DT_2SECONDS_MASK	0x1F	/* seconds divided by 2 */
-#define DT_2SECONDS_SHIFT	0
-#define DT_MINUTES_MASK		0x7E0	/* minutes */
-#define DT_MINUTES_SHIFT	5
-#define DT_HOURS_MASK		0xF800	/* hours */
-#define DT_HOURS_SHIFT		11
-
-/*
- * This is the format of the contents of the deDate field in the direntry
- * structure.
- */
-#define DD_DAY_MASK		0x1F	/* day of month */
-#define DD_DAY_SHIFT		0
-#define DD_MONTH_MASK		0x1E0	/* month */
-#define DD_MONTH_SHIFT		5
-#define DD_YEAR_MASK		0xFE00	/* year - 1980 */
-#define DD_YEAR_SHIFT		9
 /*
- * Total number of days that have passed for each month in a regular year.
+ * XXX: I think the timezone in struct nwfs_args is truly bogus, especially
+ * XXX: considering that nwfs_mount(8) picks this up from the kernel in
+ * XXX: the first place.  Since I can't test this, I won't attempt to fix it.
+ * XXX: /phk
  */
-static u_short regyear[] = {
-	31, 59, 90, 120, 151, 181,
-	212, 243, 273, 304, 334, 365
-};
 
-/*
- * Total number of days that have passed for each month in a leap year.
- */
-static u_short leapyear[] = {
-	31, 60, 91, 121, 152, 182,
-	213, 244, 274, 305, 335, 366
-};
-
-/*
- * Variables used to remember parts of the last time conversion.  Maybe we
- * can avoid a full conversion.
- */
-static u_long  lasttime;
-static u_long  lastday;
-static u_short lastddate;
-static u_short lastdtime;
-/*
- * Convert the unix version of time to dos's idea of time to be used in
- * file timestamps. The passed in unix time is assumed to be in GMT.
- */
 void
 ncp_unix2dostime(tsp, tzoff, ddp, dtp, dhp)
 	struct timespec *tsp;
@@ -586,79 +539,14 @@
 	u_int16_t *dtp;
 	u_int8_t *dhp;
 {
-	u_long t;
-	u_long days;
-	u_long inc;
-	u_long year;
-	u_long month;
-	u_short *months;
-
-	/*
-	 * If the time from the last conversion is the same as now, then
-	 * skip the computations and use the saved result.
-	 */
-	t = tsp->tv_sec - tzoff * 60 - tz_minuteswest * 60 -
-	    (wall_cmos_clock ? adjkerntz : 0);
-	t &= ~1;
-	if (lasttime != t) {
-		lasttime = t;
-		lastdtime = (((t / 2) % 30) << DT_2SECONDS_SHIFT)
-		    + (((t / 60) % 60) << DT_MINUTES_SHIFT)
-		    + (((t / 3600) % 24) << DT_HOURS_SHIFT);
-
-		/*
-		 * If the number of days since 1970 is the same as the last
-		 * time we did the computation then skip all this leap year
-		 * and month stuff.
-		 */
-		days = t / (24 * 60 * 60);
-		if (days != lastday) {
-			lastday = days;
-			for (year = 1970;; year++) {
-				inc = year & 0x03 ? 365 : 366;
-				if (days < inc)
-					break;
-				days -= inc;
-			}
-			months = year & 0x03 ? regyear : leapyear;
-			for (month = 0; days >= months[month]; month++)
-				;
-			if (month > 0)
-				days -= months[month - 1];
-			lastddate = ((days + 1) << DD_DAY_SHIFT)
-			    + ((month + 1) << DD_MONTH_SHIFT);
-			/*
-			 * Remember dos's idea of time is relative to 1980.
-			 * unix's is relative to 1970.  If somehow we get a
-			 * time before 1980 then don't give totally crazy
-			 * results.
-			 */
-			if (year > 1980)
-				lastddate += (year - 1980) << DD_YEAR_SHIFT;
-		}
-	}
-	if (dtp)
-		*dtp = lastdtime;
-	if (dhp)
-		*dhp = (tsp->tv_sec & 1) * 100 + tsp->tv_nsec / 10000000;
+	struct timespec t;
 
-	*ddp = lastddate;
+	t = *tsp;
+	
+	t.tv_sec = - tzoff * 60 - utc_offset();
+	timespec2fattime(&t, 1, ddp, dtp, dhp);
 }
 
-/*
- * The number of seconds between Jan 1, 1970 and Jan 1, 1980. In that
- * interval there were 8 regular years and 2 leap years.
- */
-#define	SECONDSTO1980	(((8 * 365) + (2 * 366)) * (24 * 60 * 60))
-
-static u_short lastdosdate;
-static u_long  lastseconds;
-
-/*
- * Convert from dos' idea of time to unix'. This will probably only be
- * called from the stat(), and fstat() system calls and so probably need
- * not be too efficient.
- */
 void
 ncp_dos2unixtime(dd, dt, dh, tzoff, tsp)
 	u_int dd;
@@ -667,47 +555,7 @@
 	int tzoff;
 	struct timespec *tsp;
 {
-	u_long seconds;
-	u_long month;
-	u_long year;
-	u_long days;
-	u_short *months;
-
-	if (dd == 0) {
-		/*
-		 * Uninitialized field, return the epoch.
-		 */
-		tsp->tv_sec = 0;
-		tsp->tv_nsec = 0;
-		return;
-	}
-	seconds = (((dt & DT_2SECONDS_MASK) >> DT_2SECONDS_SHIFT) << 1)
-	    + ((dt & DT_MINUTES_MASK) >> DT_MINUTES_SHIFT) * 60
-	    + ((dt & DT_HOURS_MASK) >> DT_HOURS_SHIFT) * 3600
-	    + dh / 100;
-	/*
-	 * If the year, month, and day from the last conversion are the
-	 * same then use the saved value.
-	 */
-	if (lastdosdate != dd) {
-		lastdosdate = dd;
-		days = 0;
-		year = (dd & DD_YEAR_MASK) >> DD_YEAR_SHIFT;
-		days = year * 365;
-		days += year / 4 + 1;	/* add in leap days */
-		if ((year & 0x03) == 0)
-			days--;		/* if year is a leap year */
-		months = year & 0x03 ? regyear : leapyear;
-		month = (dd & DD_MONTH_MASK) >> DD_MONTH_SHIFT;
-		if (month < 1 || month > 12) {
-			month = 1;
-		}
-		if (month > 1)
-			days += months[month - 2];
-		days += ((dd & DD_DAY_MASK) >> DD_DAY_SHIFT) - 1;
-		lastseconds = (days * 24 * 60 * 60) + SECONDSTO1980;
-	}
-	tsp->tv_sec = seconds + lastseconds + tz_minuteswest * 60 +
-	    tzoff * 60 + (wall_cmos_clock ? adjkerntz : 0);
-	tsp->tv_nsec = (dh % 100) * 10000000;
+
+	fattime2timespec(dd, dt, dh, 1, tsp);
+	tsp->tv_sec += tzoff * 60 + utc_offset();
 }
Index: nwfs_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/nwfs/nwfs_vnops.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/nwfs/nwfs_vnops.c -L sys/fs/nwfs/nwfs_vnops.c -u -r1.1.1.1 -r1.2
--- sys/fs/nwfs/nwfs_vnops.c
+++ sys/fs/nwfs/nwfs_vnops.c
@@ -29,7 +29,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/nwfs/nwfs_vnops.c,v 1.41.2.1 2005/10/19 20:18:44 truckman Exp $
+ * $FreeBSD: src/sys/fs/nwfs/nwfs_vnops.c,v 1.42 2005/10/16 21:54:35 truckman Exp $
  */
 #include <sys/param.h>
 #include <sys/systm.h>
Index: nwfs_io.c
===================================================================
RCS file: /home/cvs/src/sys/fs/nwfs/nwfs_io.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/nwfs/nwfs_io.c -L sys/fs/nwfs/nwfs_io.c -u -r1.1.1.1 -r1.2
--- sys/fs/nwfs/nwfs_io.c
+++ sys/fs/nwfs/nwfs_io.c
@@ -29,7 +29,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/nwfs/nwfs_io.c,v 1.41 2005/03/13 12:18:24 jeff Exp $
+ * $FreeBSD: src/sys/fs/nwfs/nwfs_io.c,v 1.46 2007/06/01 14:33:10 kib Exp $
  *
  */
 #include <sys/param.h>
@@ -226,7 +226,7 @@
 		/* We can relay only on local information about file size,
 		 * because until file is closed NetWare will not return
 		 * the correct size. */
-#if notyet
+#ifdef notyet
 			nwfs_attr_cacheremove(vp);
 			error = VOP_GETATTR(vp, &vattr, cred, td);
 			if (error) return (error);
@@ -483,7 +483,7 @@
 			 * now tell them that it is ok to use.
 			 */
 			if (!error) {
-				if (m->flags & PG_WANTED)
+				if (m->oflags & VPO_WANTED)
 					vm_page_activate(m);
 				else
 					vm_page_deactivate(m);
@@ -524,7 +524,7 @@
 #ifndef NWFS_RWCACHE
 	td = curthread;			/* XXX */
 	cred = td->td_ucred;		/* XXX */
-	VOP_OPEN(vp, FWRITE, cred, td, -1);
+	VOP_OPEN(vp, FWRITE, cred, td, NULL);
 	error = vop_stdputpages(ap);
 	VOP_CLOSE(vp, FWRITE, cred, td);
 	return error;
@@ -541,7 +541,7 @@
 
 	td = curthread;			/* XXX */
 	cred = td->td_ucred;		/* XXX */
-/*	VOP_OPEN(vp, FWRITE, cred, td, -1);*/
+/*	VOP_OPEN(vp, FWRITE, cred, td, NULL);*/
 	np = VTONW(vp);
 	nmp = VFSTONWFS(vp->v_mount);
 	pages = ap->a_m;
@@ -611,6 +611,13 @@
 			return EINTR;
 	}
 	np->n_flag |= NFLUSHINPROG;
+
+	if (vp->v_bufobj.bo_object != NULL) {
+		VM_OBJECT_LOCK(vp->v_bufobj.bo_object);
+		vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC);
+		VM_OBJECT_UNLOCK(vp->v_bufobj.bo_object);
+	}
+
 	error = vinvalbuf(vp, V_SAVE, td, PCATCH, 0);
 	while (error) {
 		if (error == ERESTART || error == EINTR) {
Index: nwfs_vfsops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/nwfs/nwfs_vfsops.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/fs/nwfs/nwfs_vfsops.c -L sys/fs/nwfs/nwfs_vfsops.c -u -r1.1.1.2 -r1.2
--- sys/fs/nwfs/nwfs_vfsops.c
+++ sys/fs/nwfs/nwfs_vfsops.c
@@ -29,7 +29,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/nwfs/nwfs_vfsops.c,v 1.40.2.2 2006/02/10 00:37:57 yar Exp $
+ * $FreeBSD: src/sys/fs/nwfs/nwfs_vfsops.c,v 1.44 2006/09/26 04:12:46 tegge Exp $
  */
 
 #include <sys/param.h>
@@ -135,7 +135,7 @@
 	struct nwfs_args args; 	  /* will hold data from mount request */
 	int error;
 
-	error = copyin(data, (caddr_t)&args, sizeof(struct nwfs_args));
+	error = copyin(data, &args, sizeof(struct nwfs_args));
 	if (error)
 		return (error);
 
@@ -267,7 +267,9 @@
 	if (nmp->m.flags & NWFS_MOUNT_HAVE_NLS)
 		free(nmp->m.nls.to_lower, M_NWFSDATA);
 	free(nmp, M_NWFSDATA);
+	MNT_ILOCK(mp);
 	mp->mnt_flag &= ~MNT_LOCAL;
+	MNT_IUNLOCK(mp);
 	return (error);
 }
 
@@ -373,7 +375,7 @@
 	struct mount *mp;
 	int cmd;
 	uid_t uid;
-	caddr_t arg;
+	void *arg;
 	struct thread *td;
 {
 	NCPVODEBUG("return EOPNOTSUPP\n");
Index: nwfs_node.c
===================================================================
RCS file: /home/cvs/src/sys/fs/nwfs/nwfs_node.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/nwfs/nwfs_node.c -L sys/fs/nwfs/nwfs_node.c -u -r1.2 -r1.3
--- sys/fs/nwfs/nwfs_node.c
+++ sys/fs/nwfs/nwfs_node.c
@@ -29,7 +29,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/nwfs/nwfs_node.c,v 1.36.2.1 2006/03/12 21:50:01 scottl Exp $
+ * $FreeBSD: src/sys/fs/nwfs/nwfs_node.c,v 1.39 2007/03/13 01:50:23 tegge Exp $
  */
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -64,8 +64,8 @@
 static u_long nwnodehash;
 static struct lock nwhashlock;
 
-static MALLOC_DEFINE(M_NWNODE, "NWFS node", "NWFS vnode private part");
-static MALLOC_DEFINE(M_NWFSHASH, "NWFS hash", "NWFS has table");
+static MALLOC_DEFINE(M_NWNODE, "nwfs_node", "NWFS vnode private part");
+static MALLOC_DEFINE(M_NWFSHASH, "nwfs_hash", "NWFS has table");
 
 static int nwfs_sysctl_vnprint(SYSCTL_HANDLER_ARGS);
 
@@ -174,6 +174,12 @@
 		FREE(np, M_NWNODE);
 		return (error);
 	}
+	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
+	if (error != 0) {
+		FREE(np, M_NWNODE);
+		*vpp = NULL;
+		return (error);
+	}
 	vp->v_data = np;
 	np->n_vnode = vp;
 	np->n_mount = nmp;
Index: portal_vfsops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/portalfs/portal_vfsops.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/portalfs/portal_vfsops.c -L sys/fs/portalfs/portal_vfsops.c -u -r1.1.1.1 -r1.2
--- sys/fs/portalfs/portal_vfsops.c
+++ sys/fs/portalfs/portal_vfsops.c
@@ -31,7 +31,7 @@
  *
  *	@(#)portal_vfsops.c	8.11 (Berkeley) 5/14/95
  *
- * $FreeBSD: src/sys/fs/portalfs/portal_vfsops.c,v 1.57 2005/03/24 07:36:14 jeff Exp $
+ * $FreeBSD: src/sys/fs/portalfs/portal_vfsops.c,v 1.60 2007/03/13 01:50:23 tegge Exp $
  */
 
 /*
@@ -56,7 +56,7 @@
 
 #include <fs/portalfs/portal.h>
 
-static MALLOC_DEFINE(M_PORTALFSMNT, "PORTAL mount", "PORTAL mount structure");
+static MALLOC_DEFINE(M_PORTALFSMNT, "portal_mount", "PORTAL mount structure");
 
 static vfs_unmount_t	portal_unmount;
 static vfs_root_t	portal_root;
@@ -136,6 +136,13 @@
 		return (error);
 	}
 
+	error = insmntque(rvp, mp);	/* XXX: Too early for mpsafe fs */
+	if (error != 0) {
+		FREE(fmp, M_PORTALFSMNT);
+		FREE(pn, M_TEMP);
+		fdrop(fp, td);
+		return (error);
+	}
 	rvp->v_data = pn;
 	rvp->v_type = VDIR;
 	rvp->v_vflag |= VV_ROOT;
@@ -146,7 +153,9 @@
 	fhold(fp);
 	fmp->pm_server = fp;
 
+	MNT_ILOCK(mp);
 	mp->mnt_flag |= MNT_LOCAL;
+	MNT_IUNLOCK(mp);
 	mp->mnt_data = (qaddr_t) fmp;
 	vfs_getnewfsid(mp);
 
Index: portal_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/portalfs/portal_vnops.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/portalfs/portal_vnops.c -L sys/fs/portalfs/portal_vnops.c -u -r1.1.1.1 -r1.2
--- sys/fs/portalfs/portal_vnops.c
+++ sys/fs/portalfs/portal_vnops.c
@@ -31,7 +31,7 @@
  *
  *	@(#)portal_vnops.c	8.14 (Berkeley) 5/21/95
  *
- * $FreeBSD: src/sys/fs/portalfs/portal_vnops.c,v 1.70 2005/03/28 09:33:52 jeff Exp $
+ * $FreeBSD: src/sys/fs/portalfs/portal_vnops.c,v 1.73 2007/03/13 01:50:23 tegge Exp $
  */
 
 /*
@@ -52,7 +52,7 @@
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/stat.h>
-#include <sys/sysproto.h>
+#include <sys/syscallsubr.h>
 #include <sys/systm.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
@@ -77,10 +77,8 @@
 	int fd;
 {
 	int error;
-	struct close_args ua;
 
-	ua.fd = fd;
-	error = close(td, &ua);
+	error = kern_close(td, fd);
 	/*
 	 * We should never get an error, and there isn't anything
 	 * we could do if we got one, so just print a message.
@@ -156,6 +154,11 @@
 
 	*vpp = fvp;
 	vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, td);
+	error = insmntque(fvp, dvp->v_mount);
+	if (error != 0) {
+		*vpp = NULLVP;
+		return (error);
+	}
 	return (0);
 
 bad:;
@@ -233,7 +236,7 @@
 	/*
 	 * Can't be opened unless the caller is set up
 	 * to deal with the side effects.  Check for this
-	 * by testing whether the p_dupfd has been set.
+	 * by testing whether td_dupfd has been set.
 	 */
 	if (td->td_dupfd >= 0)
 		return (ENODEV);
Index: procfs_dbregs.c
===================================================================
RCS file: /home/cvs/src/sys/fs/procfs/procfs_dbregs.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/fs/procfs/procfs_dbregs.c -L sys/fs/procfs/procfs_dbregs.c -u -r1.1.1.2 -r1.2
--- sys/fs/procfs/procfs_dbregs.c
+++ sys/fs/procfs/procfs_dbregs.c
@@ -40,7 +40,7 @@
  *
  * From:
  *	$Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
- * $FreeBSD: src/sys/fs/procfs/procfs_dbregs.c,v 1.26 2005/06/30 07:49:21 peter Exp $
+ * $FreeBSD: src/sys/fs/procfs/procfs_dbregs.c,v 1.27 2007/04/15 13:24:03 des Exp $
  */
 
 #include "opt_compat.h"
@@ -95,6 +95,9 @@
 	int wrap32 = 0;
 #endif
 
+	if (uio->uio_offset != 0)
+		return (0);
+
 	PROC_LOCK(p);
 	KASSERT(p->p_lock > 0, ("proc not held"));
 	if (p_candebug(td, p) != 0) {
@@ -128,6 +131,5 @@
 	}
 	PROC_UNLOCK(p);
 
-	uio->uio_offset = 0;
 	return (error);
 }
Index: procfs.c
===================================================================
RCS file: /home/cvs/src/sys/fs/procfs/procfs.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/procfs/procfs.c -L sys/fs/procfs/procfs.c -u -r1.1.1.1 -r1.2
--- sys/fs/procfs/procfs.c
+++ sys/fs/procfs/procfs.c
@@ -37,7 +37,7 @@
  *
  *	@(#)procfs_vfsops.c	8.7 (Berkeley) 5/10/95
  *
- * $FreeBSD: src/sys/fs/procfs/procfs.c,v 1.12 2005/01/06 18:10:40 imp Exp $
+ * $FreeBSD: src/sys/fs/procfs/procfs.c,v 1.16 2007/03/12 12:16:52 des Exp $
  */
 
 #include <sys/param.h>
@@ -69,10 +69,18 @@
 {
 	char *fullpath = "unknown";
 	char *freepath = NULL;
+	struct vnode *textvp;
+	int err;
 
-	vn_lock(p->p_textvp, LK_EXCLUSIVE | LK_RETRY, td);
-	vn_fullpath(td, p->p_textvp, &fullpath, &freepath);
-	VOP_UNLOCK(p->p_textvp, 0, td);
+	textvp = p->p_textvp;
+	VI_LOCK(textvp);
+	vholdl(textvp);
+	err = vn_lock(textvp, LK_EXCLUSIVE | LK_INTERLOCK, td);
+	vdrop(textvp);
+	if (err)
+		return (err);
+	vn_fullpath(td, textvp, &fullpath, &freepath);
+	VOP_UNLOCK(textvp, 0, td);
 	sbuf_printf(sb, "%s", fullpath);
 	if (freepath)
 		free(freepath, M_TEMP);
@@ -98,9 +106,7 @@
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	/* XXX inefficient, split into separate functions */
-	if (p->p_flag & P_SUGID)
-		vap->va_mode = 0;
-	else if (strcmp(pn->pn_name, "ctl") == 0 ||
+	if (strcmp(pn->pn_name, "ctl") == 0 ||
 	    strcmp(pn->pn_name, "note") == 0 ||
 	    strcmp(pn->pn_name, "notepg") == 0)
 		vap->va_mode = 0200;
@@ -110,6 +116,9 @@
 	    strcmp(pn->pn_name, "fpregs") == 0)
 		vap->va_mode = 0600;
 
+	if ((p->p_flag & P_SUGID) && pn->pn_type != pfstype_procdir)
+		vap->va_mode = 0;
+
 	vap->va_uid = p->p_ucred->cr_uid;
 	vap->va_gid = p->p_ucred->cr_gid;
 
@@ -151,39 +160,39 @@
 	root = pi->pi_root;
 
 	pfs_create_link(root, "curproc", procfs_docurproc,
-	    NULL, NULL, 0);
+	    NULL, NULL, NULL, 0);
 
 	dir = pfs_create_dir(root, "pid",
-	    procfs_attr, NULL, PFS_PROCDEP);
+	    procfs_attr, NULL, NULL, PFS_PROCDEP);
 	pfs_create_file(dir, "cmdline", procfs_doproccmdline,
-	    NULL, NULL, PFS_RD);
+	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "ctl", procfs_doprocctl,
-	    procfs_attr, NULL, PFS_WR);
+	    procfs_attr, NULL, NULL, PFS_WR);
 	pfs_create_file(dir, "dbregs", procfs_doprocdbregs,
-	    procfs_attr, procfs_candebug, PFS_RDWR|PFS_RAW);
+	    procfs_attr, procfs_candebug, NULL, PFS_RDWR|PFS_RAW);
 	pfs_create_file(dir, "etype", procfs_doproctype,
-	    NULL, NULL, PFS_RD);
+	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "fpregs", procfs_doprocfpregs,
-	    procfs_attr, procfs_candebug, PFS_RDWR|PFS_RAW);
+	    procfs_attr, procfs_candebug, NULL, PFS_RDWR|PFS_RAW);
 	pfs_create_file(dir, "map", procfs_doprocmap,
-	    NULL, procfs_notsystem, PFS_RD);
+	    NULL, procfs_notsystem, NULL, PFS_RD);
 	node = pfs_create_file(dir, "mem", procfs_doprocmem,
-	    procfs_attr, procfs_candebug, PFS_RDWR|PFS_RAW);
+	    procfs_attr, procfs_candebug, NULL, PFS_RDWR|PFS_RAW);
 	node->pn_ioctl = procfs_ioctl;
 	node->pn_close = procfs_close;
 	pfs_create_file(dir, "note", procfs_doprocnote,
-	    procfs_attr, procfs_candebug, PFS_WR);
+	    procfs_attr, procfs_candebug, NULL, PFS_WR);
 	pfs_create_file(dir, "notepg", procfs_doprocnote,
-	    procfs_attr, procfs_candebug, PFS_WR);
+	    procfs_attr, procfs_candebug, NULL, PFS_WR);
 	pfs_create_file(dir, "regs", procfs_doprocregs,
-	    procfs_attr, procfs_candebug, PFS_RDWR|PFS_RAW);
+	    procfs_attr, procfs_candebug, NULL, PFS_RDWR|PFS_RAW);
 	pfs_create_file(dir, "rlimit", procfs_doprocrlimit,
-	    NULL, NULL, PFS_RD);
+	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "status", procfs_doprocstatus,
-	    NULL, NULL, PFS_RD);
+	    NULL, NULL, NULL, PFS_RD);
 
 	pfs_create_link(dir, "file", procfs_doprocfile,
-	    NULL, procfs_notsystem, 0);
+	    NULL, procfs_notsystem, NULL, 0);
 
 	return (0);
 }
Index: procfs_regs.c
===================================================================
RCS file: /home/cvs/src/sys/fs/procfs/procfs_regs.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/fs/procfs/procfs_regs.c -L sys/fs/procfs/procfs_regs.c -u -r1.1.1.2 -r1.2
--- sys/fs/procfs/procfs_regs.c
+++ sys/fs/procfs/procfs_regs.c
@@ -34,7 +34,7 @@
  *
  * From:
  *	$Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
- * $FreeBSD: src/sys/fs/procfs/procfs_regs.c,v 1.31 2005/06/30 07:49:21 peter Exp $
+ * $FreeBSD: src/sys/fs/procfs/procfs_regs.c,v 1.32 2007/04/15 13:24:03 des Exp $
  */
 
 #include "opt_compat.h"
@@ -89,8 +89,11 @@
 	int wrap32 = 0;
 #endif
 
+	if (uio->uio_offset != 0)
+		return (0);
+
 	PROC_LOCK(p);
-	KASSERT(p->p_lock > 0, ("proc not held"));
+	PROC_ASSERT_HELD(p);
 	if (p_candebug(td, p)) {
 		PROC_UNLOCK(p);
 		return (EPERM);
@@ -122,6 +125,5 @@
 	}
 	PROC_UNLOCK(p);
 
-	uio->uio_offset = 0;
 	return (error);
 }
Index: procfs_ctl.c
===================================================================
RCS file: /home/cvs/src/sys/fs/procfs/procfs_ctl.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/procfs/procfs_ctl.c -L sys/fs/procfs/procfs_ctl.c -u -r1.2 -r1.3
--- sys/fs/procfs/procfs_ctl.c
+++ sys/fs/procfs/procfs_ctl.c
@@ -34,7 +34,7 @@
  *
  * From:
  *	$Id: procfs_ctl.c,v 1.51 2003/12/07 17:40:00 des Exp $
- * $FreeBSD: src/sys/fs/procfs/procfs_ctl.c,v 1.53.2.1 2006/03/01 20:52:10 jhb Exp $
+ * $FreeBSD: src/sys/fs/procfs/procfs_ctl.c,v 1.56 2007/06/05 00:00:51 jeff Exp $
  */
 
 #include <sys/param.h>
@@ -215,7 +215,7 @@
 		p->p_flag &= ~P_TRACED;
 
 		/* remove pending SIGTRAP, else the process will die */
-		SIGDELSET(p->p_siglist, SIGTRAP);
+		sigqueue_delete_proc(p, SIGTRAP);
 		PROC_UNLOCK(p);
 
 		/* give process back to original parent */
@@ -286,9 +286,9 @@
 		panic("procfs_control");
 	}
 
-	mtx_lock_spin(&sched_lock);
+	PROC_SLOCK(p);
 	thread_unsuspend(p); /* If it can run, let it do so. */
-	mtx_unlock_spin(&sched_lock);
+	PROC_SUNLOCK(p);
 	return (0);
 }
 
@@ -344,9 +344,9 @@
 #endif
 				/* XXXKSE: */
 				p->p_flag &= ~P_STOPPED_SIG;
-				mtx_lock_spin(&sched_lock);
+				PROC_SLOCK(p);
 				thread_unsuspend(p);
-				mtx_unlock_spin(&sched_lock);
+				PROC_SUNLOCK(p);
 			} else
 				psignal(p, nm->nm_val);
 			PROC_UNLOCK(p);
Index: procfs_fpregs.c
===================================================================
RCS file: /home/cvs/src/sys/fs/procfs/procfs_fpregs.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/fs/procfs/procfs_fpregs.c -L sys/fs/procfs/procfs_fpregs.c -u -r1.1.1.2 -r1.2
--- sys/fs/procfs/procfs_fpregs.c
+++ sys/fs/procfs/procfs_fpregs.c
@@ -34,7 +34,7 @@
  *
  * From:
  *	$Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
- * $FreeBSD: src/sys/fs/procfs/procfs_fpregs.c,v 1.32 2005/06/30 07:49:21 peter Exp $
+ * $FreeBSD: src/sys/fs/procfs/procfs_fpregs.c,v 1.33 2007/04/15 13:29:36 des Exp $
  */
 
 #include "opt_compat.h"
@@ -89,6 +89,9 @@
 	int wrap32 = 0;
 #endif
 
+	if (uio->uio_offset != 0)
+		return (0);
+
 	PROC_LOCK(p);
 	KASSERT(p->p_lock > 0, ("proc not held"));
 	if (p_candebug(td, p)) {
@@ -122,6 +125,5 @@
 	}
 	PROC_UNLOCK(p);
 
-	uio->uio_offset = 0;
 	return (error);
 }
Index: procfs_status.c
===================================================================
RCS file: /home/cvs/src/sys/fs/procfs/procfs_status.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/fs/procfs/procfs_status.c -L sys/fs/procfs/procfs_status.c -u -r1.1.1.2 -r1.2
--- sys/fs/procfs/procfs_status.c
+++ sys/fs/procfs/procfs_status.c
@@ -34,7 +34,7 @@
  *
  * From:
  *	$Id: procfs_status.c,v 3.1 1993/12/15 09:40:17 jsp Exp $
- * $FreeBSD: src/sys/fs/procfs/procfs_status.c,v 1.56 2005/03/15 11:05:11 phk Exp $
+ * $FreeBSD: src/sys/fs/procfs/procfs_status.c,v 1.62 2007/09/17 05:31:39 jeff Exp $
  */
 
 #include <sys/param.h>
@@ -112,10 +112,12 @@
 		sbuf_printf(sb, "noflags");
 	}
 
-	mtx_lock_spin(&sched_lock);
+#ifdef KSE
 	if (p->p_flag & P_SA)
 		wmesg = "-kse- ";
-	else {
+	else
+#endif
+	{
 		tdfirst = FIRST_THREAD_IN_PROC(p);
 		if (tdfirst->td_wchan != NULL) {
 			KASSERT(tdfirst->td_wmesg != NULL,
@@ -124,21 +126,21 @@
 		} else
 			wmesg = "nochan";
 	}
-	mtx_unlock_spin(&sched_lock);
 
-	if (p->p_sflag & PS_INMEM) {
+	if (p->p_flag & P_INMEM) {
 		struct timeval start, ut, st;
 
+		PROC_SLOCK(p);
 		calcru(p, &ut, &st);
+		PROC_SUNLOCK(p);
 		start = p->p_stats->p_start;
 		timevaladd(&start, &boottime);
-		sbuf_printf(sb, " %ld,%ld %ld,%ld %ld,%ld",
-		    start.tv_sec, start.tv_usec,
-		    ut.tv_sec, ut.tv_usec,
-		    st.tv_sec, st.tv_usec);
-	} else {
+		sbuf_printf(sb, " %jd,%ld %jd,%ld %jd,%ld",
+		    (intmax_t)start.tv_sec, start.tv_usec,
+		    (intmax_t)ut.tv_sec, ut.tv_usec,
+		    (intmax_t)st.tv_sec, st.tv_usec);
+	} else
 		sbuf_printf(sb, " -1,-1 -1,-1 -1,-1");
-	}
 
 	sbuf_printf(sb, " %s", wmesg);
 
Index: procfs_map.c
===================================================================
RCS file: /home/cvs/src/sys/fs/procfs/procfs_map.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/procfs/procfs_map.c -L sys/fs/procfs/procfs_map.c -u -r1.1.1.1 -r1.2
--- sys/fs/procfs/procfs_map.c
+++ sys/fs/procfs/procfs_map.c
@@ -32,7 +32,7 @@
  *
  *	@(#)procfs_status.c	8.3 (Berkeley) 2/17/94
  *
- * $FreeBSD: src/sys/fs/procfs/procfs_map.c,v 1.38 2005/06/30 07:49:21 peter Exp $
+ * $FreeBSD: src/sys/fs/procfs/procfs_map.c,v 1.40 2007/04/23 06:12:24 alc Exp $
  */
 
 #include "opt_compat.h"
@@ -42,6 +42,7 @@
 #include <sys/lock.h>
 #include <sys/filedesc.h>
 #include <sys/malloc.h>
+#include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/uio.h>
@@ -81,18 +82,17 @@
 procfs_doprocmap(PFS_FILL_ARGS)
 {
 	int len;
-	int error;
+	int error, vfslocked;
 	vm_map_t map = &p->p_vmspace->vm_map;
-	pmap_t pmap = vmspace_pmap(p->p_vmspace);
-	vm_map_entry_t entry;
+	vm_map_entry_t entry, tmp_entry;
+	struct vnode *vp;
 	char mebuffer[MEBUFFERSIZE];
 	char *fullpath, *freepath;
+	unsigned int last_timestamp;
 #ifdef COMPAT_IA32
 	int wrap32 = 0;
 #endif
 
-	GIANT_REQUIRED;
-
 	PROC_LOCK(p);
 	error = p_candebug(td, p);
 	PROC_UNLOCK(p);
@@ -112,9 +112,8 @@
                 wrap32 = 1;
         }
 #endif
-	error = 0;
-	if (map != &curthread->td_proc->p_vmspace->vm_map)
-		vm_map_lock_read(map);
+
+	vm_map_lock_read(map);
 	for (entry = map->header.next;
 		((uio->uio_resid > 0) && (entry != &map->header));
 		entry = entry->next) {
@@ -127,22 +126,29 @@
 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 			continue;
 
+		privateresident = 0;
 		obj = entry->object.vm_object;
-		if (obj && (obj->shadow_count == 1))
-			privateresident = obj->resident_page_count;
-		else
-			privateresident = 0;
+		if (obj != NULL) {
+			VM_OBJECT_LOCK(obj);
+			if (obj->shadow_count == 1)
+				privateresident = obj->resident_page_count;
+		}
 
 		resident = 0;
 		addr = entry->start;
 		while (addr < entry->end) {
-			if (pmap_extract( pmap, addr))
+			if (pmap_extract(map->pmap, addr))
 				resident++;
 			addr += PAGE_SIZE;
 		}
 
-		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object)
+		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
+			if (tobj != obj)
+				VM_OBJECT_LOCK(tobj);
+			if (lobj != obj)
+				VM_OBJECT_UNLOCK(lobj);
 			lobj = tobj;
+		}
 
 		freepath = NULL;
 		fullpath = "-";
@@ -151,25 +157,36 @@
 			default:
 			case OBJT_DEFAULT:
 				type = "default";
+				vp = NULL;
 				break;
 			case OBJT_VNODE:
 				type = "vnode";
-				vn_fullpath(td,
-				    (struct vnode *)lobj->handle,
-				    &fullpath,
-				    &freepath);
+				vp = lobj->handle;
+				vref(vp);
 				break;
 			case OBJT_SWAP:
 				type = "swap";
+				vp = NULL;
 				break;
 			case OBJT_DEVICE:
 				type = "device";
+				vp = NULL;
 				break;
 			}
+			if (lobj != obj)
+				VM_OBJECT_UNLOCK(lobj);
 
 			flags = obj->flags;
 			ref_count = obj->ref_count;
 			shadow_count = obj->shadow_count;
+			VM_OBJECT_UNLOCK(obj);
+			if (vp != NULL) {
+				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
+				vn_fullpath(td, vp, &fullpath, &freepath);
+				vput(vp);
+				VFS_UNLOCK_GIANT(vfslocked);
+			}
 		} else {
 			type = "none";
 			flags = 0;
@@ -206,12 +223,22 @@
 			error = EFBIG;
 			break;
 		}
+		last_timestamp = map->timestamp;
+		vm_map_unlock_read(map);
 		error = uiomove(mebuffer, len, uio);
+		vm_map_lock_read(map);
 		if (error)
 			break;
+		if (last_timestamp + 1 != map->timestamp) {
+			/*
+			 * Look again for the entry because the map was
+			 * modified while it was unlocked.  Specifically,
+			 * the entry may have been clipped, merged, or deleted.
+			 */
+			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
+			entry = tmp_entry;
+		}
 	}
-	if (map != &curthread->td_proc->p_vmspace->vm_map)
-		vm_map_unlock_read(map);
-
+	vm_map_unlock_read(map);
 	return (error);
 }
Index: procfs_ioctl.c
===================================================================
RCS file: /home/cvs/src/sys/fs/procfs/procfs_ioctl.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/procfs/procfs_ioctl.c -L sys/fs/procfs/procfs_ioctl.c -u -r1.1.1.1 -r1.2
--- sys/fs/procfs/procfs_ioctl.c
+++ sys/fs/procfs/procfs_ioctl.c
@@ -25,7 +25,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- *      $FreeBSD: src/sys/fs/procfs/procfs_ioctl.c,v 1.12 2005/06/30 07:49:21 peter Exp $
+ *      $FreeBSD: src/sys/fs/procfs/procfs_ioctl.c,v 1.19 2007/06/12 00:11:58 rwatson Exp $
  */
 
 #include "opt_compat.h"
@@ -34,6 +34,7 @@
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/pioctl.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/signalvar.h>
 #include <sys/systm.h>
@@ -65,39 +66,70 @@
 	struct procfs_status32 *ps32;
 #endif
 	int error, flags, sig;
+#ifdef COMPAT_FREEBSD6
+	int ival;
+#endif
+
+	KASSERT(p != NULL,
+	    ("%s() called without a process", __func__));
+	PROC_LOCK_ASSERT(p, MA_OWNED);
 
-	PROC_LOCK(p);
 	error = 0;
 	switch (cmd) {
 #if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
 	case _IOC(IOC_IN, 'p', 1, 0):
 #endif
+#ifdef COMPAT_FREEBSD6
+	case _IO('p', 1):
+		ival = IOCPARM_IVAL(data);
+		data = &ival;
+#endif
 	case PIOCBIS:
-		p->p_stops |= *(uintptr_t *)data;
+		p->p_stops |= *(unsigned int *)data;
 		break;
 #if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
 	case _IOC(IOC_IN, 'p', 2, 0):
 #endif
+#ifdef COMPAT_FREEBSD6
+	case _IO('p', 2):
+		ival = IOCPARM_IVAL(data);
+		data = &ival;
+#endif
 	case PIOCBIC:
-		p->p_stops &= ~*(uintptr_t *)data;
+		p->p_stops &= ~*(unsigned int *)data;
 		break;
 #if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
 	case _IOC(IOC_IN, 'p', 3, 0):
 #endif
+#ifdef COMPAT_FREEBSD6
+	case _IO('p', 3):
+		ival = IOCPARM_IVAL(data);
+		data = &ival;
+#endif
 	case PIOCSFL:
-		flags = *(uintptr_t *)data;
-		if (flags & PF_ISUGID && (error = suser(td)) != 0)
-			break;
+		flags = *(unsigned int *)data;
+		if (flags & PF_ISUGID) {
+			/*
+			 * XXXRW: Is this specific check required here, as
+			 * p_candebug() should implement it, or other checks
+			 * are missing.
+			 */
+			error = priv_check(td, PRIV_DEBUG_SUGID);
+			if (error)
+				break;
+		}
 		p->p_pfsflags = flags;
 		break;
 	case PIOCGFL:
 		*(unsigned int *)data = p->p_pfsflags;
 		break;
 	case PIOCWAIT:
-		while (p->p_step == 0) {
+		while (p->p_step == 0 && (p->p_flag & P_WEXIT) == 0) {
 			/* sleep until p stops */
+			_PHOLD(p);
 			error = msleep(&p->p_stype, &p->p_mtx,
 			    PWAIT|PCATCH, "pioctl", 0);
+			_PRELE(p);
 			if (error != 0)
 				break;
 		}
@@ -112,10 +144,12 @@
 		break;
 #ifdef COMPAT_IA32
 	case PIOCWAIT32:
-		while (p->p_step == 0) {
+		while (p->p_step == 0 && (p->p_flag & P_WEXIT) == 0) {
 			/* sleep until p stops */
+			_PHOLD(p);
 			error = msleep(&p->p_stype, &p->p_mtx,
 			    PWAIT|PCATCH, "pioctl", 0);
+			_PRELE(p);
 			if (error != 0)
 				break;
 		}
@@ -132,10 +166,15 @@
 #if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
 	case _IOC(IOC_IN, 'p', 5, 0):
 #endif
+#ifdef COMPAT_FREEBSD6
+	case _IO('p', 5):
+		ival = IOCPARM_IVAL(data);
+		data = &ival;
+#endif
 	case PIOCCONT:
 		if (p->p_step == 0)
 			break;
-		sig = *(uintptr_t *)data;
+		sig = *(unsigned int *)data;
 		if (sig != 0 && !_SIG_VALID(sig)) {
 			error = EINVAL;
 			break;
@@ -145,9 +184,9 @@
 		if (P_SHOULDSTOP(p)) {
 			p->p_xstat = sig;
 			p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG);
-			mtx_lock_spin(&sched_lock);
+			PROC_SLOCK(p);
 			thread_unsuspend(p);
-			mtx_unlock_spin(&sched_lock);
+			PROC_SUNLOCK(p);
 		} else if (sig)
 			psignal(p, sig);
 #else
@@ -160,7 +199,6 @@
 	default:
 		error = (ENOTTY);
 	}
-	PROC_UNLOCK(p);
 
 	return (error);
 }
Index: pseudofs.h
===================================================================
RCS file: /home/cvs/src/sys/fs/pseudofs/pseudofs.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/pseudofs/pseudofs.h -L sys/fs/pseudofs/pseudofs.h -u -r1.1.1.1 -r1.2
--- sys/fs/pseudofs/pseudofs.h
+++ sys/fs/pseudofs/pseudofs.h
@@ -25,7 +25,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- *      $FreeBSD: src/sys/fs/pseudofs/pseudofs.h,v 1.30 2005/03/24 07:36:15 jeff Exp $
+ *      $FreeBSD: src/sys/fs/pseudofs/pseudofs.h,v 1.36 2007/04/15 17:10:01 des Exp $
  */
 
 #ifndef _PSEUDOFS_H_INCLUDED
@@ -34,6 +34,7 @@
 /*
  * Opaque structures
  */
+struct mntarg;
 struct mount;
 struct nameidata;
 struct proc;
@@ -72,7 +73,6 @@
 #define	PFS_RAWWR	0x0008	/* raw writer */
 #define PFS_RAW		(PFS_RAWRD|PFS_RAWWR)
 #define PFS_PROCDEP	0x0010	/* process-dependent */
-#define PFS_DISABLED	0x8000	/* node is disabled */
 
 /*
  * Data structures
@@ -86,27 +86,35 @@
  */
 #define PFS_INIT_ARGS \
 	struct pfs_info *pi, struct vfsconf *vfc
+#define PFS_INIT_ARGNAMES \
+	pi, vfc
 #define PFS_INIT_PROTO(name) \
 	int name(PFS_INIT_ARGS);
 typedef int (*pfs_init_t)(PFS_INIT_ARGS);
 
 /*
  * Filler callback
+ * Called with proc held but unlocked
  */
 #define PFS_FILL_ARGS \
 	struct thread *td, struct proc *p, struct pfs_node *pn, \
 	struct sbuf *sb, struct uio *uio
+#define PFS_FILL_ARGNAMES \
+	td, p, pn, sb, uio
 #define PFS_FILL_PROTO(name) \
 	int name(PFS_FILL_ARGS);
 typedef int (*pfs_fill_t)(PFS_FILL_ARGS);
 
 /*
  * Attribute callback
+ * Called with proc locked
  */
 struct vattr;
 #define PFS_ATTR_ARGS \
 	struct thread *td, struct proc *p, struct pfs_node *pn, \
 	struct vattr *vap
+#define PFS_ATTR_ARGNAMES \
+	td, p, pn, vap
 #define PFS_ATTR_PROTO(name) \
 	int name(PFS_ATTR_ARGS);
 typedef int (*pfs_attr_t)(PFS_ATTR_ARGS);
@@ -115,30 +123,39 @@
 
 /*
  * Visibility callback
+ * Called with proc locked
  */
 #define PFS_VIS_ARGS \
 	struct thread *td, struct proc *p, struct pfs_node *pn
+#define PFS_VIS_ARGNAMES \
+	td, p, pn
 #define PFS_VIS_PROTO(name) \
 	int name(PFS_VIS_ARGS);
 typedef int (*pfs_vis_t)(PFS_VIS_ARGS);
 
 /*
  * Ioctl callback
+ * Called with proc locked
  */
 #define PFS_IOCTL_ARGS \
 	struct thread *td, struct proc *p, struct pfs_node *pn, \
 	unsigned long cmd, void *data
+#define PFS_IOCTL_ARGNAMES \
+	td, p, pn, cmd, data
 #define PFS_IOCTL_PROTO(name) \
 	int name(PFS_IOCTL_ARGS);
 typedef int (*pfs_ioctl_t)(PFS_IOCTL_ARGS);
 
 /*
  * Getextattr callback
+ * Called with proc locked
  */
 #define PFS_GETEXTATTR_ARGS \
 	struct thread *td, struct proc *p, struct pfs_node *pn, \
 	int attrnamespace, const char *name, struct uio *uio,	\
 	size_t *size, struct ucred *cred
+#define PFS_GETEXTATTR_ARGNAMES \
+	td, p, pn, attrnamespace, name, uio, size, cred
 #define PFS_GETEXTATTR_PROTO(name) \
 	int name(PFS_GETEXTATTR_ARGS);
 struct ucred;
@@ -146,52 +163,79 @@
 
 /*
  * Last-close callback
+ * Called with proc locked
  */
 #define PFS_CLOSE_ARGS \
 	struct thread *td, struct proc *p, struct pfs_node *pn
+#define PFS_CLOSE_ARGNAMES \
+	td, p, pn
 #define PFS_CLOSE_PROTO(name) \
 	int name(PFS_CLOSE_ARGS);
 typedef int (*pfs_close_t)(PFS_CLOSE_ARGS);
 
 /*
+ * Destroy callback
+ */
+#define PFS_DESTROY_ARGS \
+	struct pfs_node *pn
+#define PFS_DESTROY_ARGNAMES \
+	pn
+#define PFS_DESTROY_PROTO(name) \
+	int name(PFS_DESTROY_ARGS);
+typedef int (*pfs_destroy_t)(PFS_DESTROY_ARGS);
+
+/*
  * pfs_info: describes a pseudofs instance
+ *
+ * The pi_mutex is only used to avoid using the global subr_unit lock for
+ * unrhdr.  The rest of struct pfs_info is only modified while Giant is
+ * held (during vfs_init() and vfs_uninit()).
  */
 struct pfs_info {
 	char			 pi_name[PFS_FSNAMELEN];
 	pfs_init_t		 pi_init;
 	pfs_init_t		 pi_uninit;
-	/* members below this line aren't initialized */
+
+	/* members below this line are initialized at run time*/
 	struct pfs_node		*pi_root;
-	/* currently, the mutex is only used to protect the bitmap */
 	struct mtx		 pi_mutex;
 	struct unrhdr		*pi_unrhdr;
 };
 
 /*
  * pfs_node: describes a node (file or directory) within a pseudofs
+ *
+ * - Fields marked (o) are protected by the node's own mutex.
+ * - Fields marked (p) are protected by the node's parent's mutex.
+ * - Remaining fields are not protected by any lock and are assumed to be
+ *   immutable once the node has been created.
+ *
+ * To prevent deadlocks, if a node's mutex is to be held at the same time
+ * as its parent's (e.g. when adding or removing nodes to a directory),
+ * the parent's mutex must always be acquired first.  Unfortunately, this
+ * is not enforcable by WITNESS.
  */
 struct pfs_node {
 	char			 pn_name[PFS_NAMELEN];
 	pfs_type_t		 pn_type;
-	union {
-		void		*_pn_dummy;
-		pfs_fill_t	 _pn_func;
-		struct pfs_node	*_pn_nodes;
-	} u1;
-#define pn_func		u1._pn_func
-#define pn_nodes	u1._pn_nodes
+	int			 pn_flags;
+	struct mtx		 pn_mutex;
+	void			*pn_data;		/* (o) */
+
+	pfs_fill_t		 pn_fill;
 	pfs_ioctl_t		 pn_ioctl;
 	pfs_close_t		 pn_close;
 	pfs_attr_t		 pn_attr;
 	pfs_vis_t		 pn_vis;
 	pfs_getextattr_t	 pn_getextattr;
-	void			*pn_data;
-	int			 pn_flags;
+	pfs_destroy_t		 pn_destroy;
 
 	struct pfs_info		*pn_info;
-	struct pfs_node		*pn_parent;
-	struct pfs_node		*pn_next;
-	u_int32_t		 pn_fileno;
+	u_int32_t		 pn_fileno;		/* (o) */
+
+	struct pfs_node		*pn_parent;		/* (o) */
+	struct pfs_node		*pn_nodes;		/* (o) */
+	struct pfs_node		*pn_next;		/* (p) */
 };
 
 /*
@@ -199,6 +243,8 @@
  */
 int		 pfs_mount	(struct pfs_info *pi, struct mount *mp,
 				 struct thread *td);
+int		 pfs_cmount	(struct mntarg *ma, void *data, int flags,
+				 struct thread *td);
 int		 pfs_unmount	(struct mount *mp, int mntflags,
 				 struct thread *td);
 int		 pfs_root	(struct mount *mp, int flags,
@@ -212,16 +258,18 @@
  * Directory structure construction and manipulation
  */
 struct pfs_node	*pfs_create_dir	(struct pfs_node *parent, const char *name,
-				 pfs_attr_t attr, pfs_vis_t vis, int flags);
+				 pfs_attr_t attr, pfs_vis_t vis,
+				 pfs_destroy_t destroy, int flags);
 struct pfs_node	*pfs_create_file(struct pfs_node *parent, const char *name,
 				 pfs_fill_t fill, pfs_attr_t attr,
-				 pfs_vis_t vis, int flags);
+				 pfs_vis_t vis, pfs_destroy_t destroy,
+				 int flags);
 struct pfs_node	*pfs_create_link(struct pfs_node *parent, const char *name,
 				 pfs_fill_t fill, pfs_attr_t attr,
-				 pfs_vis_t vis, int flags);
+				 pfs_vis_t vis, pfs_destroy_t destroy,
+				 int flags);
 struct pfs_node	*pfs_find_node	(struct pfs_node *parent, const char *name);
-int		 pfs_disable	(struct pfs_node *pn);
-int		 pfs_enable	(struct pfs_node *pn);
+void		 pfs_purge	(struct pfs_node *pn);
 int		 pfs_destroy	(struct pfs_node *pn);
 
 /*
@@ -251,6 +299,7 @@
 }									\
 									\
 static struct vfsops name##_vfsops = {					\
+	.vfs_cmount =		pfs_cmount,				\
 	.vfs_init =		_##name##_init,				\
 	.vfs_mount =		_##name##_mount,			\
 	.vfs_root =		pfs_root,				\
Index: pseudofs_vncache.c
===================================================================
RCS file: /home/cvs/src/sys/fs/pseudofs/pseudofs_vncache.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/pseudofs/pseudofs_vncache.c -L sys/fs/pseudofs/pseudofs_vncache.c -u -r1.1.1.1 -r1.2
--- sys/fs/pseudofs/pseudofs_vncache.c
+++ sys/fs/pseudofs/pseudofs_vncache.c
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/fs/pseudofs/pseudofs_vncache.c,v 1.32 2005/07/07 07:33:10 jeff Exp $");
+__FBSDID("$FreeBSD: src/sys/fs/pseudofs/pseudofs_vncache.c,v 1.38 2007/04/23 19:17:01 des Exp $");
 
 #include "opt_pseudofs.h"
 
@@ -83,7 +83,9 @@
 void
 pfs_vncache_load(void)
 {
-	mtx_init(&pfs_vncache_mutex, "pseudofs_vncache", NULL, MTX_DEF);
+
+	mtx_assert(&Giant, MA_OWNED);
+	mtx_init(&pfs_vncache_mutex, "pfs_vncache", NULL, MTX_DEF);
 	pfs_exit_tag = EVENTHANDLER_REGISTER(process_exit, pfs_exit, NULL,
 	    EVENTHANDLER_PRI_ANY);
 }
@@ -94,10 +96,11 @@
 void
 pfs_vncache_unload(void)
 {
+
+	mtx_assert(&Giant, MA_OWNED);
 	EVENTHANDLER_DEREGISTER(process_exit, pfs_exit_tag);
-	if (pfs_vncache_entries != 0)
-		printf("pfs_vncache_unload(): %d entries remaining\n",
-		    pfs_vncache_entries);
+	KASSERT(pfs_vncache_entries == 0,
+	    ("%d vncache entries remaining", pfs_vncache_entries));
 	mtx_destroy(&pfs_vncache_mutex);
 }
 
@@ -109,28 +112,37 @@
 		  struct pfs_node *pn, pid_t pid)
 {
 	struct pfs_vdata *pvd;
+	struct vnode *vp;
 	int error;
 
 	/*
 	 * See if the vnode is in the cache.
 	 * XXX linear search is not very efficient.
 	 */
+retry:
 	mtx_lock(&pfs_vncache_mutex);
 	for (pvd = pfs_vncache; pvd; pvd = pvd->pvd_next) {
 		if (pvd->pvd_pn == pn && pvd->pvd_pid == pid &&
 		    pvd->pvd_vnode->v_mount == mp) {
-			if (vget(pvd->pvd_vnode, 0, curthread) == 0) {
+			vp = pvd->pvd_vnode;
+			VI_LOCK(vp);
+			mtx_unlock(&pfs_vncache_mutex);
+			if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, curthread) == 0) {
 				++pfs_vncache_hits;
-				*vpp = pvd->pvd_vnode;
-				mtx_unlock(&pfs_vncache_mutex);
-				/* XXX see comment at top of pfs_lookup() */
-				cache_purge(*vpp);
-				vn_lock(*vpp, LK_RETRY | LK_EXCLUSIVE,
-				    curthread);
+				*vpp = vp;
+				/*
+				 * Some callers cache_enter(vp) later, so
+				 * we have to make sure it's not in the
+				 * VFS cache so it doesn't get entered
+				 * twice.  A better solution would be to
+				 * make pfs_vncache_alloc() responsible
+				 * for entering the vnode in the VFS
+				 * cache.
+				 */
+				cache_purge(vp);
 				return (0);
 			}
-			/* XXX if this can happen, we're in trouble */
-			break;
+			goto retry;
 		}
 	}
 	mtx_unlock(&pfs_vncache_mutex);
@@ -138,10 +150,15 @@
 
 	/* nope, get a new one */
 	MALLOC(pvd, struct pfs_vdata *, sizeof *pvd, M_PFSVNCACHE, M_WAITOK);
+	mtx_lock(&pfs_vncache_mutex);
 	if (++pfs_vncache_entries > pfs_vncache_maxentries)
 		pfs_vncache_maxentries = pfs_vncache_entries;
+	mtx_unlock(&pfs_vncache_mutex);
 	error = getnewvnode("pseudofs", mp, &pfs_vnodeops, vpp);
 	if (error) {
+		mtx_lock(&pfs_vncache_mutex);
+		--pfs_vncache_entries;
+		mtx_unlock(&pfs_vncache_mutex);
 		FREE(pvd, M_PFSVNCACHE);
 		return (error);
 	}
@@ -179,6 +196,17 @@
 	if ((pn->pn_flags & PFS_PROCDEP) != 0)
 		(*vpp)->v_vflag |= VV_PROCDEP;
 	pvd->pvd_vnode = *vpp;
+	(*vpp)->v_vnlock->lk_flags |= LK_CANRECURSE;
+	vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, curthread);
+	error = insmntque(*vpp, mp);
+	if (error != 0) {
+		mtx_lock(&pfs_vncache_mutex);
+		--pfs_vncache_entries;
+		mtx_unlock(&pfs_vncache_mutex);
+		FREE(pvd, M_PFSVNCACHE);
+		*vpp = NULLVP;
+		return (error);
+	}
 	mtx_lock(&pfs_vncache_mutex);
 	pvd->pvd_prev = NULL;
 	pvd->pvd_next = pfs_vncache;
@@ -186,8 +214,6 @@
 		pvd->pvd_next->pvd_prev = pvd;
 	pfs_vncache = pvd;
 	mtx_unlock(&pfs_vncache_mutex);
-        (*vpp)->v_vnlock->lk_flags |= LK_CANRECURSE;
-	vn_lock(*vpp, LK_RETRY | LK_EXCLUSIVE, curthread);
 	return (0);
 }
 
@@ -208,47 +234,37 @@
 		pvd->pvd_prev->pvd_next = pvd->pvd_next;
 	else
 		pfs_vncache = pvd->pvd_next;
+	--pfs_vncache_entries;
 	mtx_unlock(&pfs_vncache_mutex);
 
-	--pfs_vncache_entries;
 	FREE(pvd, M_PFSVNCACHE);
 	vp->v_data = NULL;
 	return (0);
 }
 
 /*
- * Free all vnodes associated with a defunct process
+ * Purge the cache of dead entries
  *
- * XXXRW: It is unfortunate that pfs_exit() always acquires and releases two
- * mutexes (one of which is Giant) for every process exit, even if procfs
- * isn't mounted.
+ * This is extremely inefficient due to the fact that vgone() not only
+ * indirectly modifies the vnode cache, but may also sleep.  We can
+ * neither hold pfs_vncache_mutex across a vgone() call, nor make any
+ * assumptions about the state of the cache after vgone() returns.  In
+ * consequence, we must start over after every vgone() call, and keep
+ * trying until we manage to traverse the entire cache.
+ *
+ * The only way to improve this situation is to change the data structure
+ * used to implement the cache.
  */
-static void
-pfs_exit(void *arg, struct proc *p)
+void
+pfs_purge(struct pfs_node *pn)
 {
 	struct pfs_vdata *pvd;
 	struct vnode *vnp;
 
-	if (pfs_vncache == NULL)
-		return;
-	mtx_lock(&Giant);
-	/*
-	 * This is extremely inefficient due to the fact that vgone() not
-	 * only indirectly modifies the vnode cache, but may also sleep.
-	 * We can neither hold pfs_vncache_mutex across a vgone() call,
-	 * nor make any assumptions about the state of the cache after
-	 * vgone() returns.  In consequence, we must start over after
-	 * every vgone() call, and keep trying until we manage to traverse
-	 * the entire cache.
-	 *
-	 * The only way to improve this situation is to change the data
-	 * structure used to implement the cache.  An obvious choice in
-	 * this particular case would be a BST sorted by PID.
-	 */
 	mtx_lock(&pfs_vncache_mutex);
 	pvd = pfs_vncache;
 	while (pvd != NULL) {
-		if (pvd->pvd_pid == p->p_pid) {
+		if (pvd->pvd_dead || (pn != NULL && pvd->pvd_pn == pn)) {
 			vnp = pvd->pvd_vnode;
 			vhold(vnp);
 			mtx_unlock(&pfs_vncache_mutex);
@@ -263,49 +279,30 @@
 		}
 	}
 	mtx_unlock(&pfs_vncache_mutex);
-	mtx_unlock(&Giant);
 }
 
 /*
- * Disable a pseudofs node, and free all vnodes associated with it
+ * Free all vnodes associated with a defunct process
+ *
+ * XXXRW: It is unfortunate that pfs_exit() always acquires and releases two
+ * mutexes (one of which is Giant) for every process exit, even if procfs
+ * isn't mounted.
  */
-int
-pfs_disable(struct pfs_node *pn)
+static void
+pfs_exit(void *arg, struct proc *p)
 {
 	struct pfs_vdata *pvd;
-	struct vnode *vnp;
+	int dead;
 
-	if (pn->pn_flags & PFS_DISABLED)
-		return (0);
-	pn->pn_flags |= PFS_DISABLED;
-	/* XXX see comment above nearly identical code in pfs_exit() */
+	if (pfs_vncache == NULL)
+		return;
+	mtx_lock(&Giant);
 	mtx_lock(&pfs_vncache_mutex);
-	pvd = pfs_vncache;
-	while (pvd != NULL) {
-		if (pvd->pvd_pn == pn) {
-			vnp = pvd->pvd_vnode;
-			vhold(vnp);
-			mtx_unlock(&pfs_vncache_mutex);
-			VOP_LOCK(vnp, LK_EXCLUSIVE, curthread);
-			vgone(vnp);
-			VOP_UNLOCK(vnp, 0, curthread);
-			vdrop(vnp);
-			mtx_lock(&pfs_vncache_mutex);
-			pvd = pfs_vncache;
-		} else {
-			pvd = pvd->pvd_next;
-		}
-	}
+	for (pvd = pfs_vncache, dead = 0; pvd != NULL; pvd = pvd->pvd_next)
+		if (pvd->pvd_pid == p->p_pid)
+			dead = pvd->pvd_dead = 1;
 	mtx_unlock(&pfs_vncache_mutex);
-	return (0);
-}
-
-/*
- * Re-enable a disabled pseudofs node
- */
-int
-pfs_enable(struct pfs_node *pn)
-{
-	pn->pn_flags &= ~PFS_DISABLED;
-	return (0);
+	if (dead)
+		pfs_purge(NULL);
+	mtx_unlock(&Giant);
 }
Index: pseudofs_internal.h
===================================================================
RCS file: /home/cvs/src/sys/fs/pseudofs/pseudofs_internal.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/pseudofs/pseudofs_internal.h -L sys/fs/pseudofs/pseudofs_internal.h -u -r1.1.1.1 -r1.2
--- sys/fs/pseudofs/pseudofs_internal.h
+++ sys/fs/pseudofs/pseudofs_internal.h
@@ -25,7 +25,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- *      $FreeBSD: src/sys/fs/pseudofs/pseudofs_internal.h,v 1.4 2005/03/19 08:22:36 phk Exp $
+ *      $FreeBSD: src/sys/fs/pseudofs/pseudofs_internal.h,v 1.7 2007/04/15 17:10:01 des Exp $
  */
 
 #ifndef _PSEUDOFS_INTERNAL_H_INCLUDED
@@ -44,6 +44,7 @@
 	pid_t		 pvd_pid;
 	struct vnode	*pvd_vnode;
 	struct pfs_vdata*pvd_prev, *pvd_next;
+	int		 pvd_dead:1;
 };
 
 /*
@@ -60,7 +61,151 @@
  */
 void	 pfs_fileno_init	(struct pfs_info *);
 void	 pfs_fileno_uninit	(struct pfs_info *);
-void	 pfs_fileno_alloc	(struct pfs_info *, struct pfs_node *);
-void	 pfs_fileno_free	(struct pfs_info *, struct pfs_node *);
+void	 pfs_fileno_alloc	(struct pfs_node *);
+void	 pfs_fileno_free	(struct pfs_node *);
+
+/*
+ * Debugging
+ */
+#ifdef PSEUDOFS_TRACE
+extern int pfs_trace;
+
+#define PFS_TRACE(foo) \
+	do { \
+		if (pfs_trace) { \
+			printf("%s(): line %d: ", __func__, __LINE__); \
+			printf foo ; \
+			printf("\n"); \
+		} \
+	} while (0)
+#define PFS_RETURN(err) \
+	do { \
+		if (pfs_trace) { \
+			printf("%s(): line %d: returning %d\n", \
+			    __func__, __LINE__, err); \
+		} \
+		return (err); \
+	} while (0)
+#else
+#define PFS_TRACE(foo) \
+	do { /* nothing */ } while (0)
+#define PFS_RETURN(err) \
+	return (err)
+#endif
+
+/*
+ * Inline helpers for locking
+ */
+static inline void
+pfs_lock(struct pfs_node *pn)
+{
+
+	mtx_lock(&pn->pn_mutex);
+}
+
+static inline void
+pfs_unlock(struct pfs_node *pn)
+{
+
+	mtx_unlock(&pn->pn_mutex);
+}
+
+static inline void
+pfs_assert_owned(struct pfs_node *pn)
+{
+
+	mtx_assert(&pn->pn_mutex, MA_OWNED);
+}
+
+static inline void
+pfs_assert_not_owned(struct pfs_node *pn)
+{
+
+	mtx_assert(&pn->pn_mutex, MA_NOTOWNED);
+}
+
+static inline int
+pn_fill(PFS_FILL_ARGS)
+{
+
+	PFS_TRACE(("%s", pn->pn_name));
+	KASSERT(pn->pn_fill != NULL, ("%s(): no callback", __func__));
+	if (p != NULL) {
+		PROC_LOCK_ASSERT(p, MA_NOTOWNED);
+		PROC_ASSERT_HELD(p);
+	}
+	pfs_assert_not_owned(pn);
+	return ((pn->pn_fill)(PFS_FILL_ARGNAMES));
+}
+
+static inline int
+pn_attr(PFS_ATTR_ARGS)
+{
+
+	PFS_TRACE(("%s", pn->pn_name));
+	KASSERT(pn->pn_attr != NULL, ("%s(): no callback", __func__));
+	if (p != NULL)
+		PROC_LOCK_ASSERT(p, MA_OWNED);
+	pfs_assert_not_owned(pn);
+	return ((pn->pn_attr)(PFS_ATTR_ARGNAMES));
+}
+
+static inline int
+pn_vis(PFS_VIS_ARGS)
+{
+
+	PFS_TRACE(("%s", pn->pn_name));
+	KASSERT(pn->pn_vis != NULL, ("%s(): no callback", __func__));
+	KASSERT(p != NULL, ("%s(): no process", __func__));
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	pfs_assert_not_owned(pn);
+	return ((pn->pn_vis)(PFS_VIS_ARGNAMES));
+}
+
+static inline int
+pn_ioctl(PFS_IOCTL_ARGS)
+{
+
+	PFS_TRACE(("%s", pn->pn_name));
+	KASSERT(pn->pn_ioctl != NULL, ("%s(): no callback", __func__));
+	if (p != NULL)
+		PROC_LOCK_ASSERT(p, MA_OWNED);
+	pfs_assert_not_owned(pn);
+	return ((pn->pn_ioctl)(PFS_IOCTL_ARGNAMES));
+}
+
+static inline int
+pn_getextattr(PFS_GETEXTATTR_ARGS)
+{
+
+	PFS_TRACE(("%s", pn->pn_name));
+	KASSERT(pn->pn_getextattr != NULL, ("%s(): no callback", __func__));
+	if (p != NULL)
+		PROC_LOCK_ASSERT(p, MA_OWNED);
+	pfs_assert_not_owned(pn);
+	return ((pn->pn_getextattr)(PFS_GETEXTATTR_ARGNAMES));
+}
+
+static inline int
+pn_close(PFS_CLOSE_ARGS)
+{
+
+	PFS_TRACE(("%s", pn->pn_name));
+	KASSERT(pn->pn_close != NULL, ("%s(): no callback", __func__));
+	if (p != NULL)
+		PROC_LOCK_ASSERT(p, MA_OWNED);
+	pfs_assert_not_owned(pn);
+	return ((pn->pn_close)(PFS_CLOSE_ARGNAMES));
+}
+
+static inline int
+pn_destroy(PFS_DESTROY_ARGS)
+{
+
+	PFS_TRACE(("%s", pn->pn_name));
+	KASSERT(pn->pn_destroy != NULL, ("%s(): no callback", __func__));
+	pfs_assert_not_owned(pn);
+	return ((pn->pn_destroy)(PFS_DESTROY_ARGNAMES));
+}
 
 #endif
Index: pseudofs_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/pseudofs/pseudofs_vnops.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/pseudofs/pseudofs_vnops.c -L sys/fs/pseudofs/pseudofs_vnops.c -u -r1.2 -r1.3
--- sys/fs/pseudofs/pseudofs_vnops.c
+++ sys/fs/pseudofs/pseudofs_vnops.c
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/fs/pseudofs/pseudofs_vnops.c,v 1.56.2.3 2006/03/07 18:08:09 jhb Exp $");
+__FBSDID("$FreeBSD: src/sys/fs/pseudofs/pseudofs_vnops.c,v 1.65 2007/10/05 17:37:25 jhb Exp $");
 
 #include "opt_pseudofs.h"
 
@@ -52,40 +52,44 @@
 #include <fs/pseudofs/pseudofs.h>
 #include <fs/pseudofs/pseudofs_internal.h>
 
-#ifdef PSEUDOFS_TRACE
-static int pfs_trace;
-SYSCTL_INT(_vfs_pfs, OID_AUTO, trace, CTLFLAG_RW, &pfs_trace, 0,
-    "enable tracing of pseudofs vnode operations");
-
-#define PFS_TRACE(foo) \
-	do { \
-		if (pfs_trace) { \
-			printf("%s(): line %d: ", __func__, __LINE__); \
-			printf foo ; \
-			printf("\n"); \
-		} \
-	} while (0)
-#define PFS_RETURN(err) \
-	do { \
-		if (pfs_trace) { \
-			printf("%s(): line %d: returning %d\n", \
-			    __func__, __LINE__, err); \
-		} \
-		return (err); \
-	} while (0)
-#else
-#define PFS_TRACE(foo) \
-	do { /* nothing */ } while (0)
-#define PFS_RETURN(err) \
-	return (err)
-#endif
+/*
+ * Returns the fileno, adjusted for target pid
+ */
+static uint32_t
+pn_fileno(struct pfs_node *pn, pid_t pid)
+{
+
+	KASSERT(pn->pn_fileno > 0,
+	    ("%s(): no fileno allocated", __func__));
+	if (pid != NO_PID)
+		return (pn->pn_fileno * NO_PID + pid);
+	return (pn->pn_fileno);
+}
 
 /*
- * Returns non-zero if given file is visible to given process.  If the 'p'
- * parameter is non-NULL, then it will hold a pointer to the process the
- * given file belongs to on return and the process will be locked.
+ * Returns non-zero if given file is visible to given thread.
  */
 static int
+pfs_visible_proc(struct thread *td, struct pfs_node *pn, struct proc *proc)
+{
+	int visible;
+
+	if (proc == NULL)
+		return (0);
+
+	PROC_LOCK_ASSERT(proc, MA_OWNED);
+
+	visible = ((proc->p_flag & P_WEXIT) == 0);
+	if (visible)
+		visible = (p_cansee(td, proc) == 0);
+	if (visible && pn->pn_vis != NULL)
+		visible = pn_vis(td, proc, pn);
+	if (!visible)
+		return (0);
+	return (1);
+}
+
+static int
 pfs_visible(struct thread *td, struct pfs_node *pn, pid_t pid, struct proc **p)
 {
 	struct proc *proc;
@@ -93,30 +97,21 @@
 	PFS_TRACE(("%s (pid: %d, req: %d)",
 	    pn->pn_name, pid, td->td_proc->p_pid));
 
-	if (pn->pn_flags & PFS_DISABLED)
+	if (p)
+		*p = NULL;
+	if (pid == NO_PID)
+		PFS_RETURN (1);
+	if ((proc = pfind(pid)) == NULL)
 		PFS_RETURN (0);
-
-	if (pid != NO_PID) {
-		if ((proc = pfind(pid)) == NULL)
-			PFS_RETURN (0);
-		if (proc->p_flag & P_WEXIT) {
-			PROC_UNLOCK(proc);
-			PFS_RETURN (0);
-		}
-		if (p_cansee(td, proc) != 0 ||
-		    (pn->pn_vis != NULL && !(pn->pn_vis)(td, proc, pn))) {
-			PROC_UNLOCK(proc);
-			PFS_RETURN (0);
-		}
-		if (p) {
-			/* We return with the process locked to avoid races. */
+	if (pfs_visible_proc(td, pn, proc)) {
+		if (p)
 			*p = proc;
-		} else
+		else
 			PROC_UNLOCK(proc);
-	} else
-		if (p)
-			*p = NULL;
-	PFS_RETURN (1);
+		PFS_RETURN (1);
+	}
+	PROC_UNLOCK(proc);
+	PFS_RETURN (0);
 }
 
 /*
@@ -126,10 +121,12 @@
 pfs_access(struct vop_access_args *va)
 {
 	struct vnode *vn = va->a_vp;
+	struct pfs_vdata *pvd = vn->v_data;
 	struct vattr vattr;
 	int error;
 
-	PFS_TRACE((((struct pfs_vdata *)vn->v_data)->pvd_pn->pn_name));
+	PFS_TRACE(("%s", pvd->pvd_pn->pn_name));
+	(void)pvd;
 
 	error = VOP_GETATTR(vn, &vattr, va->a_cred, va->a_td);
 	if (error)
@@ -146,12 +143,13 @@
 pfs_close(struct vop_close_args *va)
 {
 	struct vnode *vn = va->a_vp;
-	struct pfs_vdata *pvd = (struct pfs_vdata *)vn->v_data;
+	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct proc *proc;
 	int error;
 
-	PFS_TRACE((pn->pn_name));
+	PFS_TRACE(("%s", pn->pn_name));
+	pfs_assert_not_owned(pn);
 
 	/*
 	 * Do nothing unless this is the last close and the node has a
@@ -160,12 +158,13 @@
 	if (vrefcnt(vn) > 1 || pn->pn_close == NULL)
 		PFS_RETURN (0);
 
-	if (pvd->pvd_pid != NO_PID)
+	if (pvd->pvd_pid != NO_PID) {
 		proc = pfind(pvd->pvd_pid);
-	else
+	} else {
 		proc = NULL;
+	}
 
-	error = (pn->pn_close)(va->a_td, proc, pn);
+	error = pn_close(va->a_td, proc, pn);
 
 	if (proc != NULL)
 		PROC_UNLOCK(proc);
@@ -180,20 +179,21 @@
 pfs_getattr(struct vop_getattr_args *va)
 {
 	struct vnode *vn = va->a_vp;
-	struct pfs_vdata *pvd = (struct pfs_vdata *)vn->v_data;
+	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct vattr *vap = va->a_vap;
 	struct proc *proc;
 	int error = 0;
 
-	PFS_TRACE((pn->pn_name));
+	PFS_TRACE(("%s", pn->pn_name));
+	pfs_assert_not_owned(pn);
 
 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
 		PFS_RETURN (ENOENT);
 
 	VATTR_NULL(vap);
 	vap->va_type = vn->v_type;
-	vap->va_fileid = pn->pn_fileno;
+	vap->va_fileid = pn_fileno(pn, pvd->pvd_pid);
 	vap->va_flags = 0;
 	vap->va_blocksize = PAGE_SIZE;
 	vap->va_bytes = vap->va_size = 0;
@@ -206,6 +206,11 @@
 	case pfstype_procdir:
 	case pfstype_root:
 	case pfstype_dir:
+#if 0
+		pfs_lock(pn);
+		/* compute link count */
+		pfs_unlock(pn);
+#endif
 		vap->va_mode = 0555;
 		break;
 	case pfstype_file:
@@ -222,7 +227,7 @@
 		vap->va_uid = proc->p_ucred->cr_ruid;
 		vap->va_gid = proc->p_ucred->cr_rgid;
 		if (pn->pn_attr != NULL)
-			error = (pn->pn_attr)(va->a_td, proc, pn, vap);
+			error = pn_attr(va->a_td, proc, pn, vap);
 		PROC_UNLOCK(proc);
 	} else {
 		vap->va_uid = 0;
@@ -239,12 +244,13 @@
 pfs_ioctl(struct vop_ioctl_args *va)
 {
 	struct vnode *vn = va->a_vp;
-	struct pfs_vdata *pvd = (struct pfs_vdata *)vn->v_data;
+	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct proc *proc;
 	int error;
 
 	PFS_TRACE(("%s: %lx", pn->pn_name, va->a_command));
+	pfs_assert_not_owned(pn);
 
 	if (vn->v_type != VREG)
 		PFS_RETURN (EINVAL);
@@ -259,15 +265,10 @@
 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
 		PFS_RETURN (EIO);
 
-	if (proc != NULL) {
-		_PHOLD(proc);
-		PROC_UNLOCK(proc);
-	}
-
-	error = (pn->pn_ioctl)(curthread, proc, pn, va->a_command, va->a_data);
+	error = pn_ioctl(curthread, proc, pn, va->a_command, va->a_data);
 
 	if (proc != NULL)
-		PRELE(proc);
+		PROC_UNLOCK(proc);
 
 	PFS_RETURN (error);
 }
@@ -279,12 +280,13 @@
 pfs_getextattr(struct vop_getextattr_args *va)
 {
 	struct vnode *vn = va->a_vp;
-	struct pfs_vdata *pvd = (struct pfs_vdata *)vn->v_data;
+	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct proc *proc;
 	int error;
 
-	PFS_TRACE((pn->pn_name));
+	PFS_TRACE(("%s", pn->pn_name));
+	pfs_assert_not_owned(pn);
 
 	/*
 	 * This is necessary because either process' privileges may
@@ -293,23 +295,17 @@
 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
 		PFS_RETURN (EIO);
 
-	if (pn->pn_getextattr == NULL) {
-		if (proc != NULL)
-			PROC_UNLOCK(proc);
-		PFS_RETURN (EOPNOTSUPP);
-	}
-
-	if (proc != NULL) {
-		_PHOLD(proc);
-		PROC_UNLOCK(proc);
-	}
-
-	error = (pn->pn_getextattr)(curthread, proc, pn, va->a_attrnamespace,
-	    va->a_name, va->a_uio, va->a_size, va->a_cred);
+	if (pn->pn_getextattr == NULL)
+		error = EOPNOTSUPP;
+	else
+		error = pn_getextattr(curthread, proc, pn,
+		    va->a_attrnamespace, va->a_name, va->a_uio,
+		    va->a_size, va->a_cred);
 
 	if (proc != NULL)
-		PRELE(proc);
+		PROC_UNLOCK(proc);
 
+	pfs_unlock(pn);
 	PFS_RETURN (error);
 }
 
@@ -322,14 +318,15 @@
 	struct vnode *vn = va->a_dvp;
 	struct vnode **vpp = va->a_vpp;
 	struct componentname *cnp = va->a_cnp;
-	struct pfs_vdata *pvd = (struct pfs_vdata *)vn->v_data;
+	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pd = pvd->pvd_pn;
 	struct pfs_node *pn, *pdn = NULL;
 	pid_t pid = pvd->pvd_pid;
 	char *pname;
-	int error, i, namelen;
+	int error, i, namelen, visible;
 
 	PFS_TRACE(("%.*s", (int)cnp->cn_namelen, cnp->cn_nameptr));
+	pfs_assert_not_owned(pd);
 
 	if (vn->v_type != VDIR)
 		PFS_RETURN (ENOTDIR);
@@ -370,7 +367,8 @@
 		if (pd->pn_type == pfstype_root)
 			PFS_RETURN (EIO);
 		VOP_UNLOCK(vn, 0, cnp->cn_thread);
-		KASSERT(pd->pn_parent, ("non-root directory has no parent"));
+		KASSERT(pd->pn_parent != NULL,
+		    ("%s(): non-root directory has no parent", __func__));
 		/*
 		 * This one is tricky.  Descendents of procdir nodes
 		 * inherit their parent's process affinity, but
@@ -382,17 +380,23 @@
 		 */
 		if (pd->pn_type == pfstype_procdir)
 			pid = NO_PID;
+		pfs_lock(pd);
 		pn = pd->pn_parent;
+		pfs_unlock(pd);
 		goto got_pnode;
 	}
 
+	pfs_lock(pd);
+
 	/* named node */
 	for (pn = pd->pn_nodes; pn != NULL; pn = pn->pn_next)
 		if (pn->pn_type == pfstype_procdir)
 			pdn = pn;
 		else if (pn->pn_name[namelen] == '\0' &&
-		    bcmp(pname, pn->pn_name, namelen) == 0)
+		    bcmp(pname, pn->pn_name, namelen) == 0) {
+			pfs_unlock(pd);
 			goto got_pnode;
+		}
 
 	/* process dependent node */
 	if ((pn = pdn) != NULL) {
@@ -400,15 +404,21 @@
 		for (pid = 0, i = 0; i < namelen && isdigit(pname[i]); ++i)
 			if ((pid = pid * 10 + pname[i] - '0') > PID_MAX)
 				break;
-		if (i == cnp->cn_namelen)
+		if (i == cnp->cn_namelen) {
+			pfs_unlock(pd);
 			goto got_pnode;
+		}
 	}
 
+	pfs_unlock(pd);
+
 	PFS_RETURN (ENOENT);
+
  got_pnode:
-	if (pn != pd->pn_parent && !pn->pn_parent)
-		pn->pn_parent = pd;
-	if (!pfs_visible(curthread, pn, pvd->pvd_pid, NULL)) {
+	pfs_assert_not_owned(pd);
+	pfs_assert_not_owned(pn);
+	visible = pfs_visible(curthread, pn, pid, NULL);
+	if (!visible) {
 		error = ENOENT;
 		goto failed;
 	}
@@ -435,24 +445,12 @@
 pfs_open(struct vop_open_args *va)
 {
 	struct vnode *vn = va->a_vp;
-	struct pfs_vdata *pvd = (struct pfs_vdata *)vn->v_data;
+	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	int mode = va->a_mode;
 
 	PFS_TRACE(("%s (mode 0x%x)", pn->pn_name, mode));
-
-	/*
-	 * check if the file is visible to the caller
-	 *
-	 * XXX Not sure if this is necessary, as the VFS system calls
-	 * XXX pfs_lookup() and pfs_access() first, and pfs_lookup()
-	 * XXX calls pfs_visible().  There's a race condition here, but
-	 * XXX calling pfs_visible() from here doesn't really close it,
-	 * XXX and the only consequence of that race is an EIO further
-	 * XXX down the line.
-	 */
-	if (!pfs_visible(va->a_td, pn, pvd->pvd_pid, NULL))
-		PFS_RETURN (ENOENT);
+	pfs_assert_not_owned(pn);
 
 	/* check if the requested mode is permitted */
 	if (((mode & FREAD) && !(mode & PFS_RD)) ||
@@ -473,7 +471,7 @@
 pfs_read(struct vop_read_args *va)
 {
 	struct vnode *vn = va->a_vp;
-	struct pfs_vdata *pvd = (struct pfs_vdata *)vn->v_data;
+	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct uio *uio = va->a_uio;
 	struct proc *proc;
@@ -481,7 +479,8 @@
 	int error;
 	unsigned int buflen, offset, resid;
 
-	PFS_TRACE((pn->pn_name));
+	PFS_TRACE(("%s", pn->pn_name));
+	pfs_assert_not_owned(pn);
 
 	if (vn->v_type != VREG)
 		PFS_RETURN (EINVAL);
@@ -489,7 +488,7 @@
 	if (!(pn->pn_flags & PFS_RD))
 		PFS_RETURN (EBADF);
 
-	if (pn->pn_func == NULL)
+	if (pn->pn_fill == NULL)
 		PFS_RETURN (EIO);
 
 	/*
@@ -498,33 +497,35 @@
 	 */
 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
 		PFS_RETURN (EIO);
-
 	if (proc != NULL) {
 		_PHOLD(proc);
 		PROC_UNLOCK(proc);
 	}
 
 	if (pn->pn_flags & PFS_RAWRD) {
-		error = (pn->pn_func)(curthread, proc, pn, NULL, uio);
+		PFS_TRACE(("%lu resid", (unsigned long)uio->uio_resid));
+		error = pn_fill(curthread, proc, pn, NULL, uio);
+		PFS_TRACE(("%lu resid", (unsigned long)uio->uio_resid));
 		if (proc != NULL)
 			PRELE(proc);
 		PFS_RETURN (error);
 	}
 
-	/* Beaucoup sanity checks so we don't ask for bogus allocation. */
+	/* beaucoup sanity checks so we don't ask for bogus allocation */
 	if (uio->uio_offset < 0 || uio->uio_resid < 0 ||
 	    (offset = uio->uio_offset) != uio->uio_offset ||
 	    (resid = uio->uio_resid) != uio->uio_resid ||
-	    (buflen = offset + resid) < offset || buflen > INT_MAX) {
+	    (buflen = offset + resid + 1) < offset || buflen > INT_MAX) {
 		if (proc != NULL)
 			PRELE(proc);
 		PFS_RETURN (EINVAL);
 	}
-	if (buflen > MAXPHYS) {
+	if (buflen > MAXPHYS + 1) {
 		if (proc != NULL)
 			PRELE(proc);
 		PFS_RETURN (EIO);
 	}
+
 	sb = sbuf_new(sb, NULL, buflen, 0);
 	if (sb == NULL) {
 		if (proc != NULL)
@@ -532,7 +533,7 @@
 		PFS_RETURN (EIO);
 	}
 
-	error = (pn->pn_func)(curthread, proc, pn, sb, uio);
+	error = pn_fill(curthread, proc, pn, sb, uio);
 
 	if (proc != NULL)
 		PRELE(proc);
@@ -552,10 +553,13 @@
  * Iterate through directory entries
  */
 static int
-pfs_iterate(struct thread *td, pid_t pid, struct pfs_node *pd,
+pfs_iterate(struct thread *td, struct proc *proc, struct pfs_node *pd,
 	    struct pfs_node **pn, struct proc **p)
 {
+	int visible;
+
 	sx_assert(&allproc_lock, SX_SLOCKED);
+	pfs_assert_owned(pd);
  again:
 	if (*pn == NULL) {
 		/* first node */
@@ -573,12 +577,22 @@
 		/* out of processes: next node */
 		if (*p == NULL)
 			*pn = (*pn)->pn_next;
+		else
+			PROC_LOCK(*p);
 	}
 
 	if ((*pn) == NULL)
 		return (-1);
 
-	if (!pfs_visible(td, *pn, *p ? (*p)->p_pid : pid, NULL))
+	if (*p != NULL) {
+		visible = pfs_visible_proc(td, *pn, *p);
+		PROC_UNLOCK(*p);
+	} else if (proc != NULL) {
+		visible = pfs_visible_proc(td, *pn, proc);
+	} else {
+		visible = 1;
+	}
+	if (!visible)
 		goto again;
 
 	return (0);
@@ -591,28 +605,26 @@
 pfs_readdir(struct vop_readdir_args *va)
 {
 	struct vnode *vn = va->a_vp;
-	struct pfs_info *pi = (struct pfs_info *)vn->v_mount->mnt_data;
-	struct pfs_vdata *pvd = (struct pfs_vdata *)vn->v_data;
+	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pd = pvd->pvd_pn;
 	pid_t pid = pvd->pvd_pid;
+	struct proc *p, *proc;
 	struct pfs_node *pn;
 	struct dirent *entry;
 	struct uio *uio;
-	struct proc *p;
 	off_t offset;
 	int error, i, resid;
 	char *buf, *ent;
 
-	PFS_TRACE((pd->pn_name));
+	KASSERT(pd->pn_info == vn->v_mount->mnt_data,
+	    ("%s(): pn_info does not match mountpoint", __func__));
+	PFS_TRACE(("%s pid %lu", pd->pn_name, (unsigned long)pid));
+	pfs_assert_not_owned(pd);
 
 	if (vn->v_type != VDIR)
 		PFS_RETURN (ENOTDIR);
 	uio = va->a_uio;
 
-	/* check if the directory is visible to the caller */
-	if (!pfs_visible(curthread, pd, pid, NULL))
-		PFS_RETURN (ENOENT);
-
 	/* only allow reading entire entries */
 	offset = uio->uio_offset;
 	resid = uio->uio_resid;
@@ -622,29 +634,41 @@
 	if (resid == 0)
 		PFS_RETURN (0);
 
-	/* skip unwanted entries */
+	/* can't do this while holding the proc lock... */
+	buf = malloc(resid, M_IOV, M_WAITOK | M_ZERO);
 	sx_slock(&allproc_lock);
-	for (pn = NULL, p = NULL; offset > 0; offset -= PFS_DELEN)
-		if (pfs_iterate(curthread, pid, pd, &pn, &p) == -1) {
+	pfs_lock(pd);
+
+        /* check if the directory is visible to the caller */
+        if (!pfs_visible(curthread, pd, pid, &proc)) {
+		sx_sunlock(&allproc_lock);
+		pfs_unlock(pd);
+		free(buf, M_IOV);
+                PFS_RETURN (ENOENT);
+	}
+	KASSERT(pid == NO_PID || proc != NULL,
+	    ("%s(): no process for pid %lu", __func__, (unsigned long)pid));
+
+	/* skip unwanted entries */
+	for (pn = NULL, p = NULL; offset > 0; offset -= PFS_DELEN) {
+		if (pfs_iterate(curthread, proc, pd, &pn, &p) == -1) {
 			/* nothing left... */
+			if (proc != NULL)
+				PROC_UNLOCK(proc);
+			pfs_unlock(pd);
 			sx_sunlock(&allproc_lock);
+			free(buf, M_IOV);
 			PFS_RETURN (0);
 		}
+	}
 
 	/* fill in entries */
-	ent = buf = malloc(resid, M_IOV, M_WAITOK | M_ZERO);
-	while (pfs_iterate(curthread, pid, pd, &pn, &p) != -1 &&
+	ent = buf;
+	while (pfs_iterate(curthread, proc, pd, &pn, &p) != -1 &&
 	    resid >= PFS_DELEN) {
 		entry = (struct dirent *)ent;
 		entry->d_reclen = PFS_DELEN;
-		if (!pn->pn_parent)
-			pn->pn_parent = pd;
-		if (!pn->pn_fileno)
-			pfs_fileno_alloc(pi, pn);
-		if (pid != NO_PID)
-			entry->d_fileno = pn->pn_fileno * NO_PID + pid;
-		else
-			entry->d_fileno = pn->pn_fileno;
+		entry->d_fileno = pn_fileno(pn, pid);
 		/* PFS_DELEN was picked to fit PFS_NAMLEN */
 		for (i = 0; i < PFS_NAMELEN - 1 && pn->pn_name[i] != '\0'; ++i)
 			entry->d_name[i] = pn->pn_name[i];
@@ -654,7 +678,6 @@
 		case pfstype_procdir:
 			KASSERT(p != NULL,
 			    ("reached procdir node with p == NULL"));
-			entry->d_fileno = pn->pn_fileno * NO_PID + p->p_pid;
 			entry->d_namlen = snprintf(entry->d_name,
 			    PFS_NAMELEN, "%d", p->p_pid);
 			/* fall through */
@@ -673,12 +696,16 @@
 		default:
 			panic("%s has unexpected node type: %d", pn->pn_name, pn->pn_type);
 		}
-		PFS_TRACE((entry->d_name));
+		PFS_TRACE(("%s", entry->d_name));
 		offset += PFS_DELEN;
 		resid -= PFS_DELEN;
 		ent += PFS_DELEN;
 	}
+	if (proc != NULL)
+		PROC_UNLOCK(proc);
+	pfs_unlock(pd);
 	sx_sunlock(&allproc_lock);
+	PFS_TRACE(("%zd bytes", ent - buf));
 	error = uiomove(buf, ent - buf, uio);
 	free(buf, M_IOV);
 	PFS_RETURN (error);
@@ -691,20 +718,21 @@
 pfs_readlink(struct vop_readlink_args *va)
 {
 	struct vnode *vn = va->a_vp;
-	struct pfs_vdata *pvd = (struct pfs_vdata *)vn->v_data;
+	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct uio *uio = va->a_uio;
 	struct proc *proc = NULL;
-	char buf[MAXPATHLEN];
+	char buf[PATH_MAX];
 	struct sbuf sb;
 	int error;
 
-	PFS_TRACE((pn->pn_name));
+	PFS_TRACE(("%s", pn->pn_name));
+	pfs_assert_not_owned(pn);
 
 	if (vn->v_type != VLNK)
 		PFS_RETURN (EINVAL);
 
-	if (pn->pn_func == NULL)
+	if (pn->pn_fill == NULL)
 		PFS_RETURN (EIO);
 
 	if (pvd->pvd_pid != NO_PID) {
@@ -721,7 +749,7 @@
 	/* sbuf_new() can't fail with a static buffer */
 	sbuf_new(&sb, buf, sizeof buf, 0);
 
-	error = (pn->pn_func)(curthread, proc, pn, &sb, NULL);
+	error = pn_fill(curthread, proc, pn, &sb, NULL);
 
 	if (proc != NULL)
 		PRELE(proc);
@@ -743,7 +771,12 @@
 static int
 pfs_reclaim(struct vop_reclaim_args *va)
 {
-	PFS_TRACE((((struct pfs_vdata *)va->a_vp->v_data)->pvd_pn->pn_name));
+	struct vnode *vn = va->a_vp;
+	struct pfs_vdata *pvd = vn->v_data;
+	struct pfs_node *pn = pvd->pvd_pn;
+
+	PFS_TRACE(("%s", pn->pn_name));
+	pfs_assert_not_owned(pn);
 
 	return (pfs_vncache_free(va->a_vp));
 }
@@ -754,7 +787,12 @@
 static int
 pfs_setattr(struct vop_setattr_args *va)
 {
-	PFS_TRACE((((struct pfs_vdata *)va->a_vp->v_data)->pvd_pn->pn_name));
+	struct vnode *vn = va->a_vp;
+	struct pfs_vdata *pvd = vn->v_data;
+	struct pfs_node *pn = pvd->pvd_pn;
+
+	PFS_TRACE(("%s", pn->pn_name));
+	pfs_assert_not_owned(pn);
 
 	PFS_RETURN (EOPNOTSUPP);
 }
@@ -766,22 +804,25 @@
 pfs_write(struct vop_write_args *va)
 {
 	struct vnode *vn = va->a_vp;
-	struct pfs_vdata *pvd = (struct pfs_vdata *)vn->v_data;
+	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct uio *uio = va->a_uio;
 	struct proc *proc;
 	struct sbuf sb;
 	int error;
 
-	PFS_TRACE((pn->pn_name));
+	PFS_TRACE(("%s", pn->pn_name));
+	pfs_assert_not_owned(pn);
 
 	if (vn->v_type != VREG)
 		PFS_RETURN (EINVAL);
+	KASSERT(pn->pn_type != pfstype_file,
+	    ("%s(): VREG vnode refers to non-file pfs_node", __func__));
 
 	if (!(pn->pn_flags & PFS_WR))
 		PFS_RETURN (EBADF);
 
-	if (pn->pn_func == NULL)
+	if (pn->pn_fill == NULL)
 		PFS_RETURN (EIO);
 
 	/*
@@ -790,29 +831,32 @@
 	 */
 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
 		PFS_RETURN (EIO);
-
 	if (proc != NULL) {
 		_PHOLD(proc);
 		PROC_UNLOCK(proc);
 	}
 
 	if (pn->pn_flags & PFS_RAWWR) {
-		error = (pn->pn_func)(curthread, proc, pn, NULL, uio);
+		pfs_lock(pn);
+		error = pn_fill(curthread, proc, pn, NULL, uio);
+		pfs_unlock(pn);
 		if (proc != NULL)
 			PRELE(proc);
 		PFS_RETURN (error);
 	}
 
 	sbuf_uionew(&sb, uio, &error);
-	if (error)
+	if (error) {
+		if (proc != NULL)
+			PRELE(proc);
 		PFS_RETURN (error);
+	}
 
-	error = (pn->pn_func)(curthread, proc, pn, &sb, uio);
+	error = pn_fill(curthread, proc, pn, &sb, uio);
 
+	sbuf_delete(&sb);
 	if (proc != NULL)
 		PRELE(proc);
-
-	sbuf_delete(&sb);
 	PFS_RETURN (error);
 }
 
Index: pseudofs_fileno.c
===================================================================
RCS file: /home/cvs/src/sys/fs/pseudofs/pseudofs_fileno.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/pseudofs/pseudofs_fileno.c -L sys/fs/pseudofs/pseudofs_fileno.c -u -r1.1.1.1 -r1.2
--- sys/fs/pseudofs/pseudofs_fileno.c
+++ sys/fs/pseudofs/pseudofs_fileno.c
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/fs/pseudofs/pseudofs_fileno.c,v 1.12 2005/03/19 08:22:36 phk Exp $");
+__FBSDID("$FreeBSD: src/sys/fs/pseudofs/pseudofs_fileno.c,v 1.15 2007/04/15 17:10:01 des Exp $");
 
 #include "opt_pseudofs.h"
 
@@ -38,7 +38,9 @@
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
+#include <sys/proc.h>
 #include <sys/sysctl.h>
+#include <sys/systm.h>
 
 #include <fs/pseudofs/pseudofs.h>
 #include <fs/pseudofs/pseudofs_internal.h>
@@ -49,13 +51,10 @@
 void
 pfs_fileno_init(struct pfs_info *pi)
 {
-	struct unrhdr *up;
 
-	up = new_unrhdr(3, INT_MAX, &pi->pi_mutex);
-	mtx_lock(&pi->pi_mutex);
-	pi->pi_unrhdr = up;
-	pi->pi_root->pn_fileno = 2;
-	mtx_unlock(&pi->pi_mutex);
+	mtx_assert(&Giant, MA_OWNED);
+	mtx_init(&pi->pi_mutex, "pfs_fileno", NULL, MTX_DEF);
+	pi->pi_unrhdr = new_unrhdr(3, INT_MAX / NO_PID, &pi->pi_mutex);
 }
 
 /*
@@ -64,60 +63,61 @@
 void
 pfs_fileno_uninit(struct pfs_info *pi)
 {
-	struct unrhdr *up;
-
-	mtx_lock(&pi->pi_mutex);
 
-	up = pi->pi_unrhdr;
+	mtx_assert(&Giant, MA_OWNED);
+	delete_unrhdr(pi->pi_unrhdr);
 	pi->pi_unrhdr = NULL;
-
-	mtx_unlock(&pi->pi_mutex);
-
-	delete_unrhdr(up);
+	mtx_destroy(&pi->pi_mutex);
 }
 
 /*
  * Allocate a file number
  */
 void
-pfs_fileno_alloc(struct pfs_info *pi, struct pfs_node *pn)
+pfs_fileno_alloc(struct pfs_node *pn)
 {
-	/* make sure our parent has a file number */
-	if (pn->pn_parent && !pn->pn_parent->pn_fileno)
-		pfs_fileno_alloc(pi, pn->pn_parent);
+
+	if (pn->pn_parent)
+		PFS_TRACE(("%s/%s", pn->pn_parent->pn_name, pn->pn_name));
+	else
+		PFS_TRACE(("%s", pn->pn_name));
+	pfs_assert_not_owned(pn);
 
 	switch (pn->pn_type) {
 	case pfstype_root:
+		/* root must always be 2 */
+		pn->pn_fileno = 2;
+		break;
 	case pfstype_dir:
 	case pfstype_file:
 	case pfstype_symlink:
 	case pfstype_procdir:
-		pn->pn_fileno = alloc_unr(pi->pi_unrhdr);
+		pn->pn_fileno = alloc_unr(pn->pn_info->pi_unrhdr);
 		break;
 	case pfstype_this:
 		KASSERT(pn->pn_parent != NULL,
-		    ("pfstype_this node has no parent"));
+		    ("%s(): pfstype_this node has no parent", __func__));
 		pn->pn_fileno = pn->pn_parent->pn_fileno;
 		break;
 	case pfstype_parent:
 		KASSERT(pn->pn_parent != NULL,
-		    ("pfstype_parent node has no parent"));
-		if (pn->pn_parent == pi->pi_root) {
+		    ("%s(): pfstype_parent node has no parent", __func__));
+		if (pn->pn_parent->pn_type == pfstype_root) {
 			pn->pn_fileno = pn->pn_parent->pn_fileno;
 			break;
 		}
 		KASSERT(pn->pn_parent->pn_parent != NULL,
-		    ("pfstype_parent node has no grandparent"));
+		    ("%s(): pfstype_parent node has no grandparent", __func__));
 		pn->pn_fileno = pn->pn_parent->pn_parent->pn_fileno;
 		break;
 	case pfstype_none:
 		KASSERT(0,
-		    ("pfs_fileno_alloc() called for pfstype_none node"));
+		    ("%s(): pfstype_none node", __func__));
 		break;
 	}
 
 #if 0
-	printf("pfs_fileno_alloc(): %s: ", pi->pi_name);
+	printf("%s(): %s: ", __func__, pn->pn_info->pi_name);
 	if (pn->pn_parent) {
 		if (pn->pn_parent->pn_parent) {
 			printf("%s/", pn->pn_parent->pn_parent->pn_name);
@@ -132,15 +132,20 @@
  * Release a file number
  */
 void
-pfs_fileno_free(struct pfs_info *pi, struct pfs_node *pn)
+pfs_fileno_free(struct pfs_node *pn)
 {
+
+	pfs_assert_not_owned(pn);
+
 	switch (pn->pn_type) {
 	case pfstype_root:
+		/* not allocated from unrhdr */
+		return;
 	case pfstype_dir:
 	case pfstype_file:
 	case pfstype_symlink:
 	case pfstype_procdir:
-		free_unr(pi->pi_unrhdr, pn->pn_fileno);
+		free_unr(pn->pn_info->pi_unrhdr, pn->pn_fileno);
 		break;
 	case pfstype_this:
 	case pfstype_parent:
Index: pseudofs.c
===================================================================
RCS file: /home/cvs/src/sys/fs/pseudofs/pseudofs.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/pseudofs/pseudofs.c -L sys/fs/pseudofs/pseudofs.c -u -r1.1.1.1 -r1.2
--- sys/fs/pseudofs/pseudofs.c
+++ sys/fs/pseudofs/pseudofs.c
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/fs/pseudofs/pseudofs.c,v 1.26 2005/03/24 07:36:15 jeff Exp $");
+__FBSDID("$FreeBSD: src/sys/fs/pseudofs/pseudofs.c,v 1.32 2007/04/15 17:10:01 des Exp $");
 
 #include "opt_pseudofs.h"
 
@@ -52,18 +52,48 @@
 SYSCTL_NODE(_vfs, OID_AUTO, pfs, CTLFLAG_RW, 0,
     "pseudofs");
 
+int pfs_trace;
+SYSCTL_INT(_vfs_pfs, OID_AUTO, trace, CTLFLAG_RW, &pfs_trace, 0,
+    "enable tracing of pseudofs vnode operations");
+
 #if PFS_FSNAMELEN != MFSNAMELEN
 #error "PFS_FSNAMELEN is not equal to MFSNAMELEN"
 #endif
 
 /*
+ * Allocate and initialize a node
+ */
+static struct pfs_node *
+pfs_alloc_node(struct pfs_info *pi, const char *name, pfs_type_t type)
+{
+	struct pfs_node *pn;
+
+	KASSERT(strlen(name) < PFS_NAMELEN,
+	    ("%s(): node name is too long", __func__));
+
+	MALLOC(pn, struct pfs_node *, sizeof *pn,
+	    M_PFSNODES, M_WAITOK|M_ZERO);
+	mtx_init(&pn->pn_mutex, "pfs_node", NULL, MTX_DEF | MTX_DUPOK);
+	strlcpy(pn->pn_name, name, sizeof pn->pn_name);
+	pn->pn_type = type;
+	pn->pn_info = pi;
+	return (pn);
+}
+
+/*
  * Add a node to a directory
  */
-static int
-_pfs_add_node(struct pfs_node *parent, struct pfs_node *node)
+static void
+pfs_add_node(struct pfs_node *parent, struct pfs_node *pn)
 {
+#ifdef INVARIANTS
+	struct pfs_node *iter;
+#endif
+
 	KASSERT(parent != NULL,
 	    ("%s(): parent is NULL", __func__));
+	KASSERT(pn->pn_parent == NULL,
+	    ("%s(): node already has a parent", __func__));
 	KASSERT(parent->pn_info != NULL,
 	    ("%s(): parent has no pn_info", __func__));
 	KASSERT(parent->pn_type == pfstype_dir ||
@@ -71,50 +101,70 @@
 	    parent->pn_type == pfstype_root,
 	    ("%s(): parent is not a directory", __func__));
 
-	/* XXX should check for duplicate names etc. */
+#ifdef INVARIANTS
+	/* XXX no locking! */
+	if (pn->pn_type == pfstype_procdir)
+		for (iter = parent; iter != NULL; iter = iter->pn_parent)
+			KASSERT(iter->pn_type != pfstype_procdir,
+			    ("%s(): nested process directories", __func__));
+	for (iter = parent->pn_nodes; iter != NULL; iter = iter->pn_next) {
+		KASSERT(strcmp(pn->pn_name, iter->pn_name) != 0,
+		    ("%s(): homonymous siblings", __func__));
+		if (pn->pn_type == pfstype_procdir)
+			KASSERT(iter->pn_type != pfstype_procdir,
+			    ("%s(): sibling process directories", __func__));
+	}
+#endif
 
-	mtx_lock(&parent->pn_info->pi_mutex);
-	node->pn_info = parent->pn_info;
-	node->pn_parent = parent;
-	node->pn_next = parent->pn_nodes;
-	parent->pn_nodes = node;
-	/* Propagate flag to all child nodes (and thus their vnodes) */
-	if ((parent->pn_flags & PFS_PROCDEP) != 0)
-		node->pn_flags |= PFS_PROCDEP;
-	mtx_unlock(&parent->pn_info->pi_mutex);
+	pn->pn_parent = parent;
+	pfs_fileno_alloc(pn);
 
-	return (0);
+	pfs_lock(parent);
+	pn->pn_next = parent->pn_nodes;
+	if ((parent->pn_flags & PFS_PROCDEP) != 0)
+		pn->pn_flags |= PFS_PROCDEP;
+	parent->pn_nodes = pn;
+	pfs_unlock(parent);
 }
 
 /*
- * Add . and .. to a directory
+ * Detach a node from its aprent
  */
-static int
-_pfs_fixup_dir(struct pfs_node *parent)
+static void
+pfs_detach_node(struct pfs_node *pn)
 {
-	struct pfs_node *dir;
+	struct pfs_node *parent = pn->pn_parent;
+	struct pfs_node **iter;
 
-	MALLOC(dir, struct pfs_node *, sizeof *dir,
-	    M_PFSNODES, M_WAITOK|M_ZERO);
-	dir->pn_name[0] = '.';
-	dir->pn_type = pfstype_this;
+	KASSERT(parent != NULL, ("%s(): node has no parent", __func__));
+	KASSERT(parent->pn_info == pn->pn_info,
+	    ("%s(): parent has different pn_info", __func__));
 
-	if (_pfs_add_node(parent, dir) != 0) {
-		FREE(dir, M_PFSNODES);
-		return (-1);
+	pfs_lock(parent);
+	iter = &parent->pn_nodes;
+	while (*iter != NULL) {
+		if (*iter == pn) {
+			*iter = pn->pn_next;
+			break;
+		}
+		iter = &(*iter)->pn_next;
 	}
+	pn->pn_parent = NULL;
+	pfs_unlock(parent);
+}
 
-	MALLOC(dir, struct pfs_node *, sizeof *dir,
-	    M_PFSNODES, M_WAITOK|M_ZERO);
-	dir->pn_name[0] = dir->pn_name[1] = '.';
-	dir->pn_type = pfstype_parent;
-
-	if (_pfs_add_node(parent, dir) != 0) {
-		FREE(dir, M_PFSNODES);
-		return (-1);
-	}
+/*
+ * Add . and .. to a directory
+ */
+static void
+pfs_fixup_dir(struct pfs_node *parent)
+{
+	struct pfs_node *pn;
 
-	return (0);
+	pn = pfs_alloc_node(parent->pn_info, ".", pfstype_this);
+	pfs_add_node(parent, pn);
+	pn = pfs_alloc_node(parent->pn_info, "..", pfstype_parent);
+	pfs_add_node(parent, pn);
 }
 
 /*
@@ -122,32 +172,21 @@
  */
 struct pfs_node	*
 pfs_create_dir(struct pfs_node *parent, const char *name,
-	       pfs_attr_t attr, pfs_vis_t vis, int flags)
+	       pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy,
+	       int flags)
 {
-	struct pfs_node *dir;
-
-	KASSERT(strlen(name) < PFS_NAMELEN,
-	    ("%s(): node name is too long", __func__));
+	struct pfs_node *pn;
 
-	MALLOC(dir, struct pfs_node *, sizeof *dir,
-	    M_PFSNODES, M_WAITOK|M_ZERO);
-	strcpy(dir->pn_name, name);
-	dir->pn_type = (flags & PFS_PROCDEP) ? pfstype_procdir : pfstype_dir;
-	dir->pn_attr = attr;
-	dir->pn_vis = vis;
-	dir->pn_flags = flags;
-
-	if (_pfs_add_node(parent, dir) != 0) {
-		FREE(dir, M_PFSNODES);
-		return (NULL);
-	}
-
-	if (_pfs_fixup_dir(dir) != 0) {
-		pfs_destroy(dir);
-		return (NULL);
-	}
+	pn = pfs_alloc_node(parent->pn_info, name,
+	    (flags & PFS_PROCDEP) ? pfstype_procdir : pfstype_dir);
+	pn->pn_attr = attr;
+	pn->pn_vis = vis;
+	pn->pn_destroy = destroy;
+	pn->pn_flags = flags;
+	pfs_add_node(parent, pn);
+	pfs_fixup_dir(pn);
 
-	return (dir);
+	return (pn);
 }
 
 /*
@@ -155,28 +194,20 @@
  */
 struct pfs_node	*
 pfs_create_file(struct pfs_node *parent, const char *name, pfs_fill_t fill,
-		pfs_attr_t attr, pfs_vis_t vis, int flags)
+		pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy,
+		int flags)
 {
-	struct pfs_node *node;
+	struct pfs_node *pn;
 
-	KASSERT(strlen(name) < PFS_NAMELEN,
-	    ("%s(): node name is too long", __func__));
+	pn = pfs_alloc_node(parent->pn_info, name, pfstype_file);
+	pn->pn_fill = fill;
+	pn->pn_attr = attr;
+	pn->pn_vis = vis;
+	pn->pn_destroy = destroy;
+	pn->pn_flags = flags;
+	pfs_add_node(parent, pn);
 
-	MALLOC(node, struct pfs_node *, sizeof *node,
-	    M_PFSNODES, M_WAITOK|M_ZERO);
-	strcpy(node->pn_name, name);
-	node->pn_type = pfstype_file;
-	node->pn_func = fill;
-	node->pn_attr = attr;
-	node->pn_vis = vis;
-	node->pn_flags = flags;
-
-	if (_pfs_add_node(parent, node) != 0) {
-		FREE(node, M_PFSNODES);
-		return (NULL);
-	}
-
-	return (node);
+	return (pn);
 }
 
 /*
@@ -184,15 +215,20 @@
  */
 struct pfs_node	*
 pfs_create_link(struct pfs_node *parent, const char *name, pfs_fill_t fill,
-		pfs_attr_t attr, pfs_vis_t vis, int flags)
+		pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy,
+		int flags)
 {
-	struct pfs_node *node;
+	struct pfs_node *pn;
+
+	pn = pfs_alloc_node(parent->pn_info, name, pfstype_symlink);
+	pn->pn_fill = fill;
+	pn->pn_attr = attr;
+	pn->pn_vis = vis;
+	pn->pn_destroy = destroy;
+	pn->pn_flags = flags;
+	pfs_add_node(parent, pn);
 
-	node = pfs_create_file(parent, name, fill, attr, vis, flags);
-	if (node == NULL)
-		return (NULL);
-	node->pn_type = pfstype_symlink;
-	return (node);
+	return (pn);
 }
 
 /*
@@ -201,57 +237,60 @@
 struct pfs_node *
 pfs_find_node(struct pfs_node *parent, const char *name)
 {
-	struct pfs_node *node;
+	struct pfs_node *pn;
 
-	for (node = parent->pn_nodes; node != NULL; node = node->pn_next)
-		if (strcmp(node->pn_name, name) == 0)
-			return (node);
-	return (NULL);
+	pfs_lock(parent);
+	for (pn = parent->pn_nodes; pn != NULL; pn = pn->pn_next)
+		if (strcmp(pn->pn_name, name) == 0)
+			break;
+	pfs_unlock(parent);
+	return (pn);
 }
 
 /*
- * Destroy a node or a tree of nodes
+ * Destroy a node and all its descendants.  If the node to be destroyed
+ * has a parent, the parent's mutex must be held.
  */
 int
-pfs_destroy(struct pfs_node *node)
+pfs_destroy(struct pfs_node *pn)
 {
-	struct pfs_node *parent, *rover;
+	struct pfs_node *iter;
 
-	KASSERT(node != NULL,
+	KASSERT(pn != NULL,
 	    ("%s(): node is NULL", __func__));
-	KASSERT(node->pn_info != NULL,
+	KASSERT(pn->pn_info != NULL,
 	    ("%s(): node has no pn_info", __func__));
 
+	if (pn->pn_parent)
+		pfs_detach_node(pn);
+
 	/* destroy children */
-	if (node->pn_type == pfstype_dir ||
-	    node->pn_type == pfstype_procdir ||
-	    node->pn_type == pfstype_root)
-		while (node->pn_nodes != NULL)
-			pfs_destroy(node->pn_nodes);
-
-	/* unlink from parent */
-	if ((parent = node->pn_parent) != NULL) {
-		KASSERT(parent->pn_info == node->pn_info,
-		    ("%s(): parent has different pn_info", __func__));
-		mtx_lock(&node->pn_info->pi_mutex);
-		if (parent->pn_nodes == node) {
-			parent->pn_nodes = node->pn_next;
-		} else {
-			rover = parent->pn_nodes;
-			while (rover->pn_next != NULL) {
-				if (rover->pn_next == node) {
-					rover->pn_next = node->pn_next;
-					break;
-				}
-				rover = rover->pn_next;
-			}
+	if (pn->pn_type == pfstype_dir ||
+	    pn->pn_type == pfstype_procdir ||
+	    pn->pn_type == pfstype_root) {
+		pfs_lock(pn);
+		while (pn->pn_nodes != NULL) {
+			iter = pn->pn_nodes;
+			pn->pn_nodes = iter->pn_next;
+			iter->pn_parent = NULL;
+			pfs_unlock(pn);
+			pfs_destroy(iter);
+			pfs_lock(pn);
 		}
-		mtx_unlock(&node->pn_info->pi_mutex);
+		pfs_unlock(pn);
 	}
 
-	/* revoke vnodes and release memory */
-	pfs_disable(node);
-	FREE(node, M_PFSNODES);
+	/* revoke vnodes and fileno */
+	pfs_purge(pn);
+
+	/* callback to free any private resources */
+	if (pn->pn_destroy != NULL)
+		pn_destroy(pn);
+
+	/* destroy the node */
+	pfs_fileno_free(pn);
+	mtx_destroy(&pn->pn_mutex);
+	FREE(pn, M_PFSNODES);
 
 	return (0);
 }
@@ -267,7 +306,10 @@
 	if (mp->mnt_flag & MNT_UPDATE)
 		return (EOPNOTSUPP);
 
+	MNT_ILOCK(mp);
 	mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_kern_flag |= MNTK_MPSAFE;
+	MNT_IUNLOCK(mp);
 	mp->mnt_data = (qaddr_t)pi;
 	vfs_getnewfsid(mp);
 
@@ -285,17 +327,24 @@
 }
 
 /*
- * Unmount a pseudofs instance
+ * Compatibility shim for old mount(2) system call
  */
 int
-pfs_unmount(struct mount *mp, int mntflags, struct thread *td)
+pfs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
 {
-	struct pfs_info *pi;
 	int error;
 
-	pi = (struct pfs_info *)mp->mnt_data;
+	error = kernel_mount(ma, flags);
+	return (error);
+}
 
-	/* XXX do stuff with pi... */
+/*
+ * Unmount a pseudofs instance
+ */
+int
+pfs_unmount(struct mount *mp, int mntflags, struct thread *td)
+{
+	int error;
 
 	error = vflush(mp, 0, (mntflags & MNT_FORCE) ?  FORCECLOSE : 0, td);
 	return (error);
@@ -310,7 +359,7 @@
 	struct pfs_info *pi;
 
 	pi = (struct pfs_info *)mp->mnt_data;
-	return pfs_vncache_alloc(mp, vpp, pi->pi_root, NO_PID);
+	return (pfs_vncache_alloc(mp, vpp, pi->pi_root, NO_PID));
 }
 
 /*
@@ -332,30 +381,24 @@
 	struct pfs_node *root;
 	int error;
 
-	mtx_init(&pi->pi_mutex, "pseudofs", NULL, MTX_DEF);
+	mtx_assert(&Giant, MA_OWNED);
+
+	pfs_fileno_init(pi);
 
 	/* set up the root diretory */
-	MALLOC(root, struct pfs_node *, sizeof *root,
-	    M_PFSNODES, M_WAITOK|M_ZERO);
-	root->pn_type = pfstype_root;
-	root->pn_name[0] = '/';
-	root->pn_info = pi;
-	if (_pfs_fixup_dir(root) != 0) {
-		FREE(root, M_PFSNODES);
-		return (ENODEV); /* XXX not really the right errno */
-	}
+	root = pfs_alloc_node(pi, "/", pfstype_root);
 	pi->pi_root = root;
+	pfs_fileno_alloc(root);
+	pfs_fixup_dir(root);
 
 	/* construct file hierarchy */
 	error = (pi->pi_init)(pi, vfc);
 	if (error) {
 		pfs_destroy(root);
 		pi->pi_root = NULL;
-		mtx_destroy(&pi->pi_mutex);
 		return (error);
 	}
 
-	pfs_fileno_init(pi);
 	if (bootverbose)
 		printf("%s registered\n", pi->pi_name);
 	return (0);
@@ -369,10 +412,11 @@
 {
 	int error;
 
-	pfs_fileno_uninit(pi);
+	mtx_assert(&Giant, MA_OWNED);
+
 	pfs_destroy(pi->pi_root);
 	pi->pi_root = NULL;
-	mtx_destroy(&pi->pi_mutex);
+	pfs_fileno_uninit(pi);
 	if (bootverbose)
 		printf("%s unregistered\n", pi->pi_name);
 	error = (pi->pi_uninit)(pi, vfc);
Index: smbfs_smb.c
===================================================================
RCS file: /home/cvs/src/sys/fs/smbfs/smbfs_smb.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/smbfs/smbfs_smb.c -L sys/fs/smbfs/smbfs_smb.c -u -r1.2 -r1.3
--- sys/fs/smbfs/smbfs_smb.c
+++ sys/fs/smbfs/smbfs_smb.c
@@ -29,7 +29,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/smbfs/smbfs_smb.c,v 1.15 2005/05/04 15:05:46 takawata Exp $
+ * $FreeBSD: src/sys/fs/smbfs/smbfs_smb.c,v 1.18 2007/02/27 17:23:28 jhb Exp $
  */
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -512,9 +512,10 @@
 		mb_put_uint8(mbp, 0);
 		smb_rq_bend(rqp);
 		error = smb_rq_simple(rqp);
-		SMBERROR("%d\n", error);
-		if (error)
+		if (error) {
+			SMBERROR("smb_rq_simple(rqp) => error %d\n", error);
 			break;
+		}
 	} while(0);
 	smb_rq_done(rqp);
 	return error;
@@ -1198,7 +1199,7 @@
 			 * I've didn't notice any problem, but put code
 			 * for it.
 			 */
-			 tsleep(&flags, PVFS, "fix95", tvtohz(&tv));
+			 pause("fix95", tvtohz(&tv));
 		}
 #endif
 	}
Index: smbfs_node.c
===================================================================
RCS file: /home/cvs/src/sys/fs/smbfs/smbfs_node.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/smbfs/smbfs_node.c -L sys/fs/smbfs/smbfs_node.c -u -r1.2 -r1.3
--- sys/fs/smbfs/smbfs_node.c
+++ sys/fs/smbfs/smbfs_node.c
@@ -29,17 +29,17 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/smbfs/smbfs_node.c,v 1.28.2.1 2006/03/12 21:50:01 scottl Exp $
+ * $FreeBSD: src/sys/fs/smbfs/smbfs_node.c,v 1.34 2007/05/29 11:28:28 rwatson Exp $
  */
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
+#include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 #include <sys/vnode.h>
@@ -64,8 +64,8 @@
 
 extern struct vop_vector smbfs_vnodeops;	/* XXX -> .h file */
 
-MALLOC_DEFINE(M_SMBNODE, "SMBFS node", "SMBFS vnode private part");
-static MALLOC_DEFINE(M_SMBNODENAME, "SMBFS nname", "SMBFS node name");
+MALLOC_DEFINE(M_SMBNODE, "smbufs_node", "SMBFS vnode private part");
+static MALLOC_DEFINE(M_SMBNODENAME, "smbufs_nname", "SMBFS node name");
 
 int smbfs_hashprint(struct mount *mp);
 
@@ -139,22 +139,16 @@
 
 	cp = name;
 	cp--;
-	if (*cp != 0xfc) {
-		printf("First byte of name entry '%s' corrupted\n", name);
-		kdb_enter("ditto");
-	}
+	if (*cp != 0xfc)
+		panic("First byte of name entry '%s' corrupted", name);
 	cp -= sizeof(int);
 	nmlen = *(int*)cp;
 	slen = strlen(name) + 1;
-	if (nmlen != slen) {
-		printf("Name length mismatch: was %d, now %d name '%s'\n",
+	if (nmlen != slen)
+		panic("Name length mismatch: was %d, now %d name '%s'",
 		    nmlen, slen, name);
-		kdb_enter("ditto");
-	}
-	if (name[nmlen] != 0xfe) {
-		printf("Last byte of name entry '%s' corrupted\n", name);
-		kdb_enter("ditto");
-	}
+	if (name[nmlen] != 0xfe)
+		panic("Last byte of name entry '%s' corrupted\n", name);
 	free(cp, M_SMBNODENAME);
 #else
 	free(name, M_SMBNODENAME);
@@ -241,6 +235,11 @@
 		FREE(np, M_SMBNODE);
 		return error;
 	}
+	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
+	if (error != 0) {
+		FREE(np, M_SMBNODE);
+		return (error);
+	}
 	vp->v_type = fap->fa_attr & SMB_FA_DIR ? VDIR : VREG;
 	bzero(np, sizeof(*np));
 	vp->v_data = np;
@@ -421,6 +420,8 @@
 	va->va_type = vp->v_type;		/* vnode type (for create) */
 	if (vp->v_type == VREG) {
 		va->va_mode = smp->sm_file_mode; /* files access mode and type */
+		if (np->n_dosattr & SMB_FA_RDONLY)
+			va->va_mode &= ~(S_IWUSR|S_IWGRP|S_IWOTH);
 	} else if (vp->v_type == VDIR) {
 		va->va_mode = smp->sm_dir_mode;	/* files access mode and type */
 	} else
Index: smbfs_io.c
===================================================================
RCS file: /home/cvs/src/sys/fs/smbfs/smbfs_io.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/smbfs/smbfs_io.c -L sys/fs/smbfs/smbfs_io.c -u -r1.1.1.1 -r1.2
--- sys/fs/smbfs/smbfs_io.c
+++ sys/fs/smbfs/smbfs_io.c
@@ -29,7 +29,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/smbfs/smbfs_io.c,v 1.33 2005/03/13 12:18:24 jeff Exp $
+ * $FreeBSD: src/sys/fs/smbfs/smbfs_io.c,v 1.41 2007/06/04 21:45:16 attilio Exp $
  *
  */
 #include <sys/param.h>
@@ -264,7 +264,7 @@
 				return error;
 		}
 		if (ioflag & IO_APPEND) {
-#if notyet
+#ifdef notyet
 			/*
 			 * File size can be changed by another client
 			 */
@@ -475,8 +475,8 @@
 
 	kva = (vm_offset_t) bp->b_data;
 	pmap_qenter(kva, pages, npages);
-	cnt.v_vnodein++;
-	cnt.v_vnodepgsin += npages;
+	PCPU_INC(cnt.v_vnodein);
+	PCPU_ADD(cnt.v_vnodepgsin, npages);
 
 	iov.iov_base = (caddr_t) kva;
 	iov.iov_len = count;
@@ -551,7 +551,7 @@
 			 * now tell them that it is ok to use.
 			 */
 			if (!error) {
-				if (m->flags & PG_WANTED)
+				if (m->oflags & VPO_WANTED)
 					vm_page_activate(m);
 				else
 					vm_page_deactivate(m);
@@ -592,7 +592,7 @@
 #ifdef SMBFS_RWGENERIC
 	td = curthread;			/* XXX */
 	cred = td->td_ucred;		/* XXX */
-	VOP_OPEN(vp, FWRITE, cred, td, -1);
+	VOP_OPEN(vp, FWRITE, cred, td, NULL);
 	error = vop_stdputpages(ap);
 	VOP_CLOSE(vp, FWRITE, cred, td);
 	return error;
@@ -610,7 +610,7 @@
 
 	td = curthread;			/* XXX */
 	cred = td->td_ucred;		/* XXX */
-/*	VOP_OPEN(vp, FWRITE, cred, td, -1);*/
+/*	VOP_OPEN(vp, FWRITE, cred, td, NULL);*/
 	np = VTOSMB(vp);
 	smp = VFSTOSMBFS(vp->v_mount);
 	pages = ap->a_m;
@@ -626,8 +626,8 @@
 
 	kva = (vm_offset_t) bp->b_data;
 	pmap_qenter(kva, pages, npages);
-	cnt.v_vnodeout++;
-	cnt.v_vnodepgsout += count;
+	PCPU_INC(cnt.v_vnodeout);
+	PCPU_ADD(cnt.v_vnodepgsout, count);
 
 	iov.iov_base = (caddr_t) kva;
 	iov.iov_len = count;
@@ -683,6 +683,13 @@
 			return EINTR;
 	}
 	np->n_flag |= NFLUSHINPROG;
+
+	if (vp->v_bufobj.bo_object != NULL) {
+		VM_OBJECT_LOCK(vp->v_bufobj.bo_object);
+		vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC);
+		VM_OBJECT_UNLOCK(vp->v_bufobj.bo_object);
+	}
+
 	error = vinvalbuf(vp, V_SAVE, td, PCATCH, 0);
 	while (error) {
 		if (error == ERESTART || error == EINTR) {
Index: smbfs_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/smbfs/smbfs_vnops.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/smbfs/smbfs_vnops.c -L sys/fs/smbfs/smbfs_vnops.c -u -r1.2 -r1.3
--- sys/fs/smbfs/smbfs_vnops.c
+++ sys/fs/smbfs/smbfs_vnops.c
@@ -29,7 +29,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/smbfs/smbfs_vnops.c,v 1.61.2.1 2006/02/20 00:53:13 yar Exp $
+ * $FreeBSD: src/sys/fs/smbfs/smbfs_vnops.c,v 1.65 2007/05/31 11:51:50 kib Exp $
  */
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -44,6 +44,7 @@
 #include <sys/vnode.h>
 #include <sys/limits.h>
 #include <sys/lockf.h>
+#include <sys/stat.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
@@ -214,7 +215,7 @@
 	}
 	if (error == 0) {
 		np->n_flag |= NOPEN;
-		vnode_create_vobject_off(ap->a_vp, vattr.va_size, ap->a_td);
+		vnode_create_vobject(ap->a_vp, vattr.va_size, ap->a_td);
 	}
 	smbfs_attr_cacheremove(vp);
 	return error;
@@ -301,6 +302,7 @@
 	struct smb_vc *vcp = SSTOVC(ssp);
 	u_quad_t tsize = 0;
 	int isreadonly, doclose, error = 0;
+	int old_n_dosattr;
 
 	SMBVDEBUG("\n");
 	if (vap->va_flags != VNOVAL)
@@ -346,17 +348,31 @@
 			return error;
 		}
   	}
+	if (vap->va_mode != (mode_t)VNOVAL) {
+		old_n_dosattr = np->n_dosattr;
+		if (vap->va_mode & S_IWUSR)
+			np->n_dosattr &= ~SMB_FA_RDONLY;
+		else
+			np->n_dosattr |= SMB_FA_RDONLY;
+		if (np->n_dosattr != old_n_dosattr) {
+			error = smbfs_smb_setpattr(np, np->n_dosattr, NULL, &scred);
+			if (error)
+				return error;
+		}
+	}
 	mtime = atime = NULL;
 	if (vap->va_mtime.tv_sec != VNOVAL)
 		mtime = &vap->va_mtime;
 	if (vap->va_atime.tv_sec != VNOVAL)
 		atime = &vap->va_atime;
 	if (mtime != atime) {
-		if (ap->a_cred->cr_uid != VTOSMBFS(vp)->sm_uid &&
-		    (error = suser_cred(ap->a_cred, SUSER_ALLOWJAIL)) &&
-		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
-		    (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, ap->a_td))))
-			return (error);
+		if (vap->va_vaflags & VA_UTIMES_NULL) {
+			error = VOP_ACCESS(vp, VADMIN, ap->a_cred, ap->a_td);
+			if (error)
+				error = VOP_ACCESS(vp, VWRITE, ap->a_cred,
+				    ap->a_td);
+		} else
+			error = VOP_ACCESS(vp, VADMIN, ap->a_cred, ap->a_td);
 #if 0
 		if (mtime == NULL)
 			mtime = &np->n_mtime;
@@ -369,7 +385,7 @@
 		 */
 		if ((np->n_flag & NOPEN) == 0) {
 			if (vcp->vc_flags & SMBV_WIN95) {
-				error = VOP_OPEN(vp, FWRITE, ap->a_cred, ap->a_td, -1);
+				error = VOP_OPEN(vp, FWRITE, ap->a_cred, ap->a_td, NULL);
 				if (!error) {
 /*				error = smbfs_smb_setfattrNT(np, 0, mtime, atime, &scred);
 				VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td);*/
Index: smbfs_vfsops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/smbfs/smbfs_vfsops.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/fs/smbfs/smbfs_vfsops.c -L sys/fs/smbfs/smbfs_vfsops.c -u -r1.1.1.2 -r1.2
--- sys/fs/smbfs/smbfs_vfsops.c
+++ sys/fs/smbfs/smbfs_vfsops.c
@@ -29,7 +29,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/smbfs/smbfs_vfsops.c,v 1.34.2.1 2006/02/10 00:37:57 yar Exp $
+ * $FreeBSD: src/sys/fs/smbfs/smbfs_vfsops.c,v 1.41 2006/09/26 04:12:46 tegge Exp $
  */
 
 #include <sys/param.h>
@@ -70,7 +70,7 @@
 SYSCTL_INT(_vfs_smbfs, OID_AUTO, version, CTLFLAG_RD, &smbfs_version, 0, "");
 SYSCTL_INT(_vfs_smbfs, OID_AUTO, debuglevel, CTLFLAG_RW, &smbfs_debuglevel, 0, "");
 
-static MALLOC_DEFINE(M_SMBFSHASH, "SMBFS hash", "SMBFS hash table");
+static MALLOC_DEFINE(M_SMBFSHASH, "smbfs_hash", "SMBFS hash table");
 
 static vfs_init_t       smbfs_init;
 static vfs_uninit_t     smbfs_uninit;
@@ -108,7 +108,7 @@
 	struct smbfs_args args;
 	int error;
 
-	error = copyin(data, (caddr_t)&args, sizeof(struct smbfs_args));
+	error = copyin(data, &args, sizeof(struct smbfs_args));
 	if (error)
 		return error;
 
@@ -138,7 +138,7 @@
 static const char *smbfs_opts[] = {
 	"dev", "soft", "intr", "strong", "have_nls", "long",
 	"mountpoint", "rootpath", "uid", "gid", "file_mode", "dir_mode",
-	"caseopt", NULL
+	"caseopt", "errmsg", NULL
 };
 
 static int
@@ -155,15 +155,20 @@
 	if (mp->mnt_flag & (MNT_UPDATE | MNT_ROOTFS))
 		return EOPNOTSUPP;
 
-	if (vfs_filteropt(mp->mnt_optnew, smbfs_opts))
+	if (vfs_filteropt(mp->mnt_optnew, smbfs_opts)) {
+		vfs_mount_error(mp, "%s", "Invalid option");
 		return (EINVAL);
+	}
 
 	smb_makescred(&scred, td, td->td_ucred);
-	if (1 != vfs_scanopt(mp->mnt_optnew, "dev", "%d", &v))
+	if (1 != vfs_scanopt(mp->mnt_optnew, "dev", "%d", &v)) {
+		vfs_mount_error(mp, "No dev option");
 		return (EINVAL);
+	}
 	error = smb_dev2share(v, SMBM_EXEC, &scred, &ssp);
 	if (error) {
 		printf("invalid device handle %d (%d)\n", v, error);
+		vfs_mount_error(mp, "invalid device handle %d (%d)\n", v, error);
 		return error;
 	}
 	vcp = SSTOVC(ssp);
@@ -177,8 +182,9 @@
 	    M_WAITOK|M_USE_RESERVE);
 #endif
         if (smp == NULL) {
-                printf("could not alloc smbmount\n");
-                error = ENOMEM;
+		printf("could not alloc smbmount\n");
+		vfs_mount_error(mp, "could not alloc smbmount", v, error);
+		error = ENOMEM;
 		goto bad;
         }
 	bzero(smp, sizeof(*smp));
@@ -191,37 +197,40 @@
 	smp->sm_root = NULL;
 	if (1 != vfs_scanopt(mp->mnt_optnew,
 	    "caseopt", "%d", &smp->sm_caseopt)) {
+		vfs_mount_error(mp, "Invalid caseopt");
 		error = EINVAL;
 		goto bad;
 	}
 	if (1 != vfs_scanopt(mp->mnt_optnew, "uid", "%d", &v)) {
+		vfs_mount_error(mp, "Invalid uid");
 		error = EINVAL;
 		goto bad;
 	}
 	smp->sm_uid = v;
 
 	if (1 != vfs_scanopt(mp->mnt_optnew, "gid", "%d", &v)) {
+		vfs_mount_error(mp, "Invalid gid");
 		error = EINVAL;
 		goto bad;
 	}
 	smp->sm_gid = v;
 
 	if (1 != vfs_scanopt(mp->mnt_optnew, "file_mode", "%d", &v)) {
+		vfs_mount_error(mp, "Invalid file_mode");
 		error = EINVAL;
 		goto bad;
 	}
 	smp->sm_file_mode = (v & (S_IRWXU|S_IRWXG|S_IRWXO)) | S_IFREG;
 
 	if (1 != vfs_scanopt(mp->mnt_optnew, "dir_mode", "%d", &v)) {
+		vfs_mount_error(mp, "Invalid dir_mode");
 		error = EINVAL;
 		goto bad;
 	}
 	smp->sm_dir_mode  = (v & (S_IRWXU|S_IRWXG|S_IRWXO)) | S_IFDIR;
 
 	vfs_flagopt(mp->mnt_optnew,
-	    "long", &smp->sm_flags, SMBFS_MOUNT_NO_LONG);
-
-	smp->sm_flags ^= SMBFS_MOUNT_NO_LONG;
+	    "nolong", &smp->sm_flags, SMBFS_MOUNT_NO_LONG);
 
 /*	simple_lock_init(&smp->sm_npslock);*/
 	pc = mp->mnt_stat.f_mntfromname;
@@ -240,8 +249,10 @@
 	}
 	vfs_getnewfsid(mp);
 	error = smbfs_root(mp, LK_EXCLUSIVE, &vp, td);
-	if (error)
+	if (error) {
+		vfs_mount_error(mp, "smbfs_root error: %d", error);
 		goto bad;
+	}
 	VOP_UNLOCK(vp, 0, td);
 	SMBVDEBUG("root.v_usecount = %d\n", vrefcnt(vp));
 
@@ -293,6 +304,9 @@
 	if (error)
 		return error;
 	smb_makescred(&scred, td, td->td_ucred);
+	error = smb_share_lock(smp->sm_share, LK_EXCLUSIVE, td);
+	if (error)
+		return error;
 	smb_share_put(smp->sm_share, &scred);
 	mp->mnt_data = (qaddr_t)0;
 
@@ -304,7 +318,9 @@
 #else
 	free(smp, M_SMBFSDATA);
 #endif
+	MNT_ILOCK(mp);
 	mp->mnt_flag &= ~MNT_LOCAL;
+	MNT_IUNLOCK(mp);
 	return error;
 }
 
@@ -324,6 +340,7 @@
 
 	if (smp == NULL) {
 		SMBERROR("smp == NULL (bug in umount)\n");
+		vfs_mount_error(mp, "smp == NULL (bug in umount)");
 		return EINVAL;
 	}
 	if (smp->sm_root) {
@@ -354,7 +371,7 @@
 	struct mount *mp;
 	int cmd;
 	uid_t uid;
-	caddr_t arg;
+	void *arg;
 	struct thread *td;
 {
 	SMBVDEBUG("return EOPNOTSUPP\n");
@@ -394,8 +411,10 @@
 	struct smb_cred scred;
 	int error = 0;
 
-	if (np == NULL)
+	if (np == NULL) {
+		vfs_mount_error(mp, "np == NULL");
 		return EINVAL;
+	}
 	
 	sbp->f_iosize = SSTOVC(ssp)->vc_txmax;		/* optimal transfer block size */
 	smb_makescred(&scred, td, td->td_ucred);
Index: smbfs_subr.c
===================================================================
RCS file: /home/cvs/src/sys/fs/smbfs/smbfs_subr.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/smbfs/smbfs_subr.c -L sys/fs/smbfs/smbfs_subr.c -u -r1.1.1.1 -r1.2
--- sys/fs/smbfs/smbfs_subr.c
+++ sys/fs/smbfs/smbfs_subr.c
@@ -29,13 +29,13 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/smbfs/smbfs_subr.c,v 1.8 2005/05/06 20:47:09 anholt Exp $
+ * $FreeBSD: src/sys/fs/smbfs/smbfs_subr.c,v 1.11 2006/10/24 11:55:18 phk Exp $
  */
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
+#include <sys/clock.h>
 #include <sys/malloc.h>
-#include <machine/clock.h>
 #include <sys/time.h>
 #include <sys/vnode.h>
 #include <sys/sysctl.h>
@@ -51,59 +51,7 @@
 #include <fs/smbfs/smbfs_node.h>
 #include <fs/smbfs/smbfs_subr.h>
 
-MALLOC_DEFINE(M_SMBFSDATA, "SMBFS data", "SMBFS private data");
-
-/* 
- * Time & date conversion routines taken from msdosfs. Although leap
- * year calculation is bogus, it's sufficient before 2100 :)
- */
-/*
- * This is the format of the contents of the deTime field in the direntry
- * structure.
- * We don't use bitfields because we don't know how compilers for
- * arbitrary machines will lay them out.
- */
-#define DT_2SECONDS_MASK	0x1F	/* seconds divided by 2 */
-#define DT_2SECONDS_SHIFT	0
-#define DT_MINUTES_MASK		0x7E0	/* minutes */
-#define DT_MINUTES_SHIFT	5
-#define DT_HOURS_MASK		0xF800	/* hours */
-#define DT_HOURS_SHIFT		11
-
-/*
- * This is the format of the contents of the deDate field in the direntry
- * structure.
- */
-#define DD_DAY_MASK		0x1F	/* day of month */
-#define DD_DAY_SHIFT		0
-#define DD_MONTH_MASK		0x1E0	/* month */
-#define DD_MONTH_SHIFT		5
-#define DD_YEAR_MASK		0xFE00	/* year - 1980 */
-#define DD_YEAR_SHIFT		9
-/*
- * Total number of days that have passed for each month in a regular year.
- */
-static u_short regyear[] = {
-	31, 59, 90, 120, 151, 181,
-	212, 243, 273, 304, 334, 365
-};
-
-/*
- * Total number of days that have passed for each month in a leap year.
- */
-static u_short leapyear[] = {
-	31, 60, 91, 121, 152, 182,
-	213, 244, 274, 305, 335, 366
-};
-
-/*
- * Variables used to remember parts of the last time conversion.  Maybe we
- * can avoid a full conversion.
- */
-static u_long  lasttime;
-static u_long  lastday;
-static u_short lastddate;
-static u_short lastdtime;
+MALLOC_DEFINE(M_SMBFSDATA, "smbfs_data", "SMBFS private data");
 
 void
 smb_time_local2server(struct timespec *tsp, int tzoff, u_long *seconds)
@@ -146,112 +94,22 @@
 smb_time_unix2dos(struct timespec *tsp, int tzoff, u_int16_t *ddp, 
 	u_int16_t *dtp,	u_int8_t *dhp)
 {
-	u_long t, days, year, month, inc;
-	u_short *months;
+	struct timespec tt;
+	u_long t;
 
-	/*
-	 * If the time from the last conversion is the same as now, then
-	 * skip the computations and use the saved result.
-	 */
+	tt = *tsp;
 	smb_time_local2server(tsp, tzoff, &t);
-	t &= ~1;
-	if (lasttime != t) {
-		lasttime = t;
-		lastdtime = (((t / 2) % 30) << DT_2SECONDS_SHIFT)
-		    + (((t / 60) % 60) << DT_MINUTES_SHIFT)
-		    + (((t / 3600) % 24) << DT_HOURS_SHIFT);
-
-		/*
-		 * If the number of days since 1970 is the same as the last
-		 * time we did the computation then skip all this leap year
-		 * and month stuff.
-		 */
-		days = t / (24 * 60 * 60);
-		if (days != lastday) {
-			lastday = days;
-			for (year = 1970;; year++) {
-				inc = year & 0x03 ? 365 : 366;
-				if (days < inc)
-					break;
-				days -= inc;
-			}
-			months = year & 0x03 ? regyear : leapyear;
-			for (month = 0; days >= months[month]; month++)
-				;
-			if (month > 0)
-				days -= months[month - 1];
-			lastddate = ((days + 1) << DD_DAY_SHIFT)
-			    + ((month + 1) << DD_MONTH_SHIFT);
-			/*
-			 * Remember dos's idea of time is relative to 1980.
-			 * unix's is relative to 1970.  If somehow we get a
-			 * time before 1980 then don't give totally crazy
-			 * results.
-			 */
-			if (year > 1980)
-				lastddate += (year - 1980) << DD_YEAR_SHIFT;
-		}
-	}
-	if (dtp)
-		*dtp = lastdtime;
-	if (dhp)
-		*dhp = (tsp->tv_sec & 1) * 100 + tsp->tv_nsec / 10000000;
-
-	*ddp = lastddate;
+	tt.tv_sec = t;
+	timespec2fattime(&tt, 1, ddp, dtp, dhp);
 }
 
-/*
- * The number of seconds between Jan 1, 1970 and Jan 1, 1980. In that
- * interval there were 8 regular years and 2 leap years.
- */
-#define	SECONDSTO1980	(((8 * 365) + (2 * 366)) * (24 * 60 * 60))
-
-static u_short lastdosdate;
-static u_long  lastseconds;
-
 void
 smb_dos2unixtime(u_int dd, u_int dt, u_int dh, int tzoff,
 	struct timespec *tsp)
 {
-	u_long seconds;
-	u_long month;
-	u_long year;
-	u_long days;
-	u_short *months;
-
-	if (dd == 0) {
-		tsp->tv_sec = 0;
-		tsp->tv_nsec = 0;
-		return;
-	}
-	seconds = (((dt & DT_2SECONDS_MASK) >> DT_2SECONDS_SHIFT) << 1)
-	    + ((dt & DT_MINUTES_MASK) >> DT_MINUTES_SHIFT) * 60
-	    + ((dt & DT_HOURS_MASK) >> DT_HOURS_SHIFT) * 3600
-	    + dh / 100;
-	/*
-	 * If the year, month, and day from the last conversion are the
-	 * same then use the saved value.
-	 */
-	if (lastdosdate != dd) {
-		lastdosdate = dd;
-		days = 0;
-		year = (dd & DD_YEAR_MASK) >> DD_YEAR_SHIFT;
-		days = year * 365;
-		days += year / 4 + 1;	/* add in leap days */
-		if ((year & 0x03) == 0)
-			days--;		/* if year is a leap year */
-		months = year & 0x03 ? regyear : leapyear;
-		month = (dd & DD_MONTH_MASK) >> DD_MONTH_SHIFT;
-		if (month < 1 || month > 12) {
-			month = 1;
-		}
-		if (month > 1)
-			days += months[month - 2];
-		days += ((dd & DD_DAY_MASK) >> DD_DAY_SHIFT) - 1;
-		lastseconds = (days * 24 * 60 * 60) + SECONDSTO1980;
-	}
-	smb_time_server2local(seconds + lastseconds, tzoff, tsp);
-	tsp->tv_nsec = (dh % 100) * 10000000;
+
+	fattime2timespec(dd, dt, dh, 1, tsp);
+	smb_time_server2local(tsp->tv_sec, tzoff, tsp);
 }
 
 static int
--- /dev/null
+++ sys/fs/tmpfs/tmpfs_subr.c
@@ -0,0 +1,1301 @@
+/*	$NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $	*/
+
+/*
+ * Copyright (c) 2005 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
+ * 2005 program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Efficient memory file system supporting functions.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/tmpfs/tmpfs_subr.c,v 1.12.2.1 2007/11/26 06:46:44 delphij Exp $");
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/stat.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/vmmeter.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_extern.h>
+
+#include <fs/tmpfs/tmpfs.h>
+#include <fs/tmpfs/tmpfs_fifoops.h>
+#include <fs/tmpfs/tmpfs_vnops.h>
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Allocates a new node of type 'type' inside the 'tmp' mount point, with
+ * its owner set to 'uid', its group to 'gid' and its mode set to 'mode',
+ * using the credentials of the process 'p'.
+ *
+ * If the node type is set to 'VDIR', then the parent parameter must point
+ * to the parent directory of the node being created.  It may only be NULL
+ * while allocating the root node.
+ *
+ * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter
+ * specifies the device the node represents.
+ *
+ * If the node type is set to 'VLNK', then the parameter target specifies
+ * the file name of the target file for the symbolic link that is being
+ * created.
+ *
+ * Note that new nodes are retrieved from the available list if it has
+ * items or, if it is empty, from the node pool as long as there is enough
+ * space to create them.
+ *
+ * Returns zero on success or an appropriate error code on failure.
+ */
+int
+tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
+    uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent,
+    char *target, dev_t rdev, struct thread *p, struct tmpfs_node **node)
+{
+	struct tmpfs_node *nnode;
+
+	/* If the root directory of the 'tmp' file system is not yet
+	 * allocated, this must be the request to do it. */
+	MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR));
+
+	MPASS(IFF(type == VLNK, target != NULL));
+	MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL));
+
+	if (tmp->tm_nodes_inuse > tmp->tm_nodes_max)
+		return (ENOSPC);
+
+	nnode = (struct tmpfs_node *)uma_zalloc_arg(
+				tmp->tm_node_pool, tmp, M_WAITOK);
+
+	/* Generic initialization. */
+	nnode->tn_type = type;
+	vfs_timestamp(&nnode->tn_atime);
+	nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime =
+	    nnode->tn_atime;
+	nnode->tn_uid = uid;
+	nnode->tn_gid = gid;
+	nnode->tn_mode = mode;
+	nnode->tn_id = alloc_unr(tmp->tm_ino_unr);
+
+	/* Type-specific initialization. */
+	switch (nnode->tn_type) {
+	case VBLK:
+	case VCHR:
+		nnode->tn_rdev = rdev;
+		break;
+
+	case VDIR:
+		TAILQ_INIT(&nnode->tn_dir.tn_dirhead);
+		MPASS(parent != nnode);
+		MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL));
+		nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent;
+		nnode->tn_dir.tn_readdir_lastn = 0;
+		nnode->tn_dir.tn_readdir_lastp = NULL;
+		nnode->tn_links++;
+		nnode->tn_dir.tn_parent->tn_links++;
+		break;
+
+	case VFIFO:
+		/* FALLTHROUGH */
+	case VSOCK:
+		break;
+
+	case VLNK:
+		MPASS(strlen(target) < MAXPATHLEN);
+		nnode->tn_size = strlen(target);
+		nnode->tn_link = malloc(nnode->tn_size, M_TMPFSNAME,
+		    M_WAITOK);
+		memcpy(nnode->tn_link, target, nnode->tn_size);
+		break;
+
+	case VREG:
+		nnode->tn_reg.tn_aobj =
+		    vm_pager_allocate(OBJT_SWAP, NULL, 0, VM_PROT_DEFAULT, 0);
+		nnode->tn_reg.tn_aobj_pages = 0;
+		break;
+
+	default:
+		MPASS(0);
+	}
+
+	TMPFS_LOCK(tmp);
+	LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries);
+	tmp->tm_nodes_inuse++;
+	TMPFS_UNLOCK(tmp);
+
+	*node = nnode;
+	return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Destroys the node pointed to by node from the file system 'tmp'.
+ * If the node does not belong to the given mount point, the results are
+ * unpredicted.
+ *
+ * If the node references a directory; no entries are allowed because
+ * their removal could need a recursive algorithm, something forbidden in
+ * kernel space.  Furthermore, there is not need to provide such
+ * functionality (recursive removal) because the only primitives offered
+ * to the user are the removal of empty directories and the deletion of
+ * individual files.
+ *
+ * Note that nodes are not really deleted; in fact, when a node has been
+ * allocated, it cannot be deleted during the whole life of the file
+ * system.  Instead, they are moved to the available list and remain there
+ * until reused.
+ */
+void
+tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
+{
+	size_t pages = 0;
+
+#ifdef INVARIANTS
+	TMPFS_NODE_LOCK(node);
+	MPASS(node->tn_vnode == NULL);
+	TMPFS_NODE_UNLOCK(node);
+#endif
+
+	TMPFS_LOCK(tmp);
+	LIST_REMOVE(node, tn_entries);
+	tmp->tm_nodes_inuse--;
+	TMPFS_UNLOCK(tmp);
+
+	switch (node->tn_type) {
+	case VNON:
+		/* Do not do anything.  VNON is provided to let the
+		 * allocation routine clean itself easily by avoiding
+		 * duplicating code in it. */
+		/* FALLTHROUGH */
+	case VBLK:
+		/* FALLTHROUGH */
+	case VCHR:
+		/* FALLTHROUGH */
+	case VDIR:
+		/* FALLTHROUGH */
+	case VFIFO:
+		/* FALLTHROUGH */
+	case VSOCK:
+		break;
+
+	case VLNK:
+		free(node->tn_link, M_TMPFSNAME);
+		break;
+
+	case VREG:
+		if (node->tn_reg.tn_aobj != NULL)
+			vm_object_deallocate(node->tn_reg.tn_aobj);
+		pages = node->tn_reg.tn_aobj_pages;
+		break;
+
+	default:
+		MPASS(0);
+		break;
+	}
+
+	free_unr(tmp->tm_ino_unr, node->tn_id);
+	uma_zfree(tmp->tm_node_pool, node);
+
+	TMPFS_LOCK(tmp);
+	tmp->tm_pages_used -= pages;
+	TMPFS_UNLOCK(tmp);
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Allocates a new directory entry for the node node with a name of name.
+ * The new directory entry is returned in *de.
+ *
+ * The link count of node is increased by one to reflect the new object
+ * referencing it.
+ *
+ * Returns zero on success or an appropriate error code on failure.
+ */
+int
+tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
+    const char *name, uint16_t len, struct tmpfs_dirent **de)
+{
+	struct tmpfs_dirent *nde;
+
+	nde = (struct tmpfs_dirent *)uma_zalloc(
+					tmp->tm_dirent_pool, M_WAITOK);
+	nde->td_name = malloc(len, M_TMPFSNAME, M_WAITOK);
+	nde->td_namelen = len;
+	memcpy(nde->td_name, name, len);
+
+	nde->td_node = node;
+	node->tn_links++;
+
+	*de = nde;
+
+	return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Frees a directory entry.  It is the caller's responsibility to destroy
+ * the node referenced by it if needed.
+ *
+ * The link count of node is decreased by one to reflect the removal of an
+ * object that referenced it.  This only happens if 'node_exists' is true;
+ * otherwise the function will not access the node referred to by the
+ * directory entry, as it may already have been released from the outside.
+ */
+void
+tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de,
+    boolean_t node_exists)
+{
+	if (node_exists) {
+		struct tmpfs_node *node;
+
+		node = de->td_node;
+
+		MPASS(node->tn_links > 0);
+		node->tn_links--;
+	}
+
+	free(de->td_name, M_TMPFSNAME);
+	uma_zfree(tmp->tm_dirent_pool, de);
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Allocates a new vnode for the node node or returns a new reference to
+ * an existing one if the node had already a vnode referencing it.  The
+ * resulting locked vnode is returned in *vpp.
+ *
+ * Returns zero on success or an appropriate error code on failure.
+ */
+int
+tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag,
+    struct vnode **vpp, struct thread *td)
+{
+	int error = 0;
+	struct vnode *vp;
+
+loop:
+	TMPFS_NODE_LOCK(node);
+	if ((vp = node->tn_vnode) != NULL) {
+		VI_LOCK(vp);
+		TMPFS_NODE_UNLOCK(node);
+		vholdl(vp);
+		(void) vget(vp, lkflag | LK_INTERLOCK | LK_RETRY, td);
+		vdrop(vp);
+
+		/*
+		 * Make sure the vnode is still there after
+		 * getting the interlock to avoid racing a free.
+		 */
+		if (node->tn_vnode == NULL || node->tn_vnode != vp) {
+			vput(vp);
+			goto loop;
+		}
+
+		goto out;
+	}
+
+	/*
+	 * otherwise lock the vp list while we call getnewvnode
+	 * since that can block.
+	 */
+	if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) {
+		node->tn_vpstate |= TMPFS_VNODE_WANT;
+		error = msleep((caddr_t) &node->tn_vpstate,
+		    TMPFS_NODE_MTX(node), PDROP | PCATCH,
+		    "tmpfs_alloc_vp", 0);
+		if (error)
+			return error;
+
+		goto loop;
+	} else
+		node->tn_vpstate |= TMPFS_VNODE_ALLOCATING;
+	
+	TMPFS_NODE_UNLOCK(node);
+
+	/* Get a new vnode and associate it with our node. */
+	error = getnewvnode("tmpfs", mp, &tmpfs_vnodeop_entries, &vp);
+	if (error != 0)
+		goto unlock;
+	MPASS(vp != NULL);
+
+	(void) vn_lock(vp, lkflag | LK_RETRY, td);
+
+	vp->v_data = node;
+	vp->v_type = node->tn_type;
+
+	/* Type-specific initialization. */
+	switch (node->tn_type) {
+	case VBLK:
+		/* FALLTHROUGH */
+	case VCHR:
+		/* FALLTHROUGH */
+	case VLNK:
+		/* FALLTHROUGH */
+	case VREG:
+		/* FALLTHROUGH */
+	case VSOCK:
+		break;
+	case VFIFO:
+		vp->v_op = &tmpfs_fifoop_entries;
+		break;
+	case VDIR:
+		if (node->tn_dir.tn_parent == node)
+			vp->v_vflag |= VV_ROOT;
+		break;
+
+	default:
+		MPASS(0);
+	}
+
+	vnode_pager_setsize(vp, node->tn_size);
+	error = insmntque(vp, mp);
+	if (error) {
+		vgone(vp);
+		vput(vp);
+		vp = NULL;
+	}
+
+unlock:
+	TMPFS_NODE_LOCK(node);
+
+	MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING);
+	node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING;
+	node->tn_vnode = vp;
+
+	if (node->tn_vpstate & TMPFS_VNODE_WANT) {
+		node->tn_vpstate &= ~TMPFS_VNODE_WANT;
+		TMPFS_NODE_UNLOCK(node);
+		wakeup((caddr_t) &node->tn_vpstate);
+	} else
+		TMPFS_NODE_UNLOCK(node);
+
+out:
+	*vpp = vp;
+
+	MPASS(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp, td)));
+#ifdef INVARIANTS
+	TMPFS_NODE_LOCK(node);
+	MPASS(*vpp == node->tn_vnode);
+	TMPFS_NODE_UNLOCK(node);
+#endif
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Destroys the association between the vnode vp and the node it
+ * references.
+ */
+void
+tmpfs_free_vp(struct vnode *vp)
+{
+	struct tmpfs_node *node;
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	TMPFS_NODE_LOCK(node);
+	node->tn_vnode = NULL;
+	vp->v_data = NULL;
+	TMPFS_NODE_UNLOCK(node);
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Allocates a new file of type 'type' and adds it to the parent directory
+ * 'dvp'; this addition is done using the component name given in 'cnp'.
+ * The ownership of the new file is automatically assigned based on the
+ * credentials of the caller (through 'cnp'), the group is set based on
+ * the parent directory and the mode is determined from the 'vap' argument.
+ * If successful, *vpp holds a vnode to the newly created file and zero
+ * is returned.  Otherwise *vpp is NULL and the function returns an
+ * appropriate error code.
+ */
+int
+tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
+    struct componentname *cnp, char *target)
+{
+	int error;
+	struct tmpfs_dirent *de;
+	struct tmpfs_mount *tmp;
+	struct tmpfs_node *dnode;
+	struct tmpfs_node *node;
+	struct tmpfs_node *parent;
+
+	MPASS(VOP_ISLOCKED(dvp, cnp->cn_thread));
+	MPASS(cnp->cn_flags & HASBUF);
+
+	tmp = VFS_TO_TMPFS(dvp->v_mount);
+	dnode = VP_TO_TMPFS_DIR(dvp);
+	*vpp = NULL;
+
+	/* If the entry we are creating is a directory, we cannot overflow
+	 * the number of links of its parent, because it will get a new
+	 * link. */
+	if (vap->va_type == VDIR) {
+		/* Ensure that we do not overflow the maximum number of links
+		 * imposed by the system. */
+		MPASS(dnode->tn_links <= LINK_MAX);
+		if (dnode->tn_links == LINK_MAX) {
+			error = EMLINK;
+			goto out;
+		}
+
+		parent = dnode;
+		MPASS(parent != NULL);
+	} else
+		parent = NULL;
+
+	/* Allocate a node that represents the new file. */
+	error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid,
+	    dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev,
+	    cnp->cn_thread, &node);
+	if (error != 0)
+		goto out;
+
+	/* Allocate a directory entry that points to the new file. */
+	error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
+	    &de);
+	if (error != 0) {
+		tmpfs_free_node(tmp, node);
+		goto out;
+	}
+
+	/* Allocate a vnode for the new file. */
+	error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp,
+	    cnp->cn_thread);
+	if (error != 0) {
+		tmpfs_free_dirent(tmp, de, TRUE);
+		tmpfs_free_node(tmp, node);
+		goto out;
+	}
+
+	/* Now that all required items are allocated, we can proceed to
+	 * insert the new node into the directory, an operation that
+	 * cannot fail. */
+	tmpfs_dir_attach(dvp, de);
+
+out:
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Attaches the directory entry de to the directory represented by vp.
+ * Note that this does not change the link count of the node pointed by
+ * the directory entry, as this is done by tmpfs_alloc_dirent.
+ */
+void
+tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
+{
+	struct tmpfs_node *dnode;
+
+	ASSERT_VOP_ELOCKED(vp, __func__);
+	dnode = VP_TO_TMPFS_DIR(vp);
+	TAILQ_INSERT_TAIL(&dnode->tn_dir.tn_dirhead, de, td_entries);
+	dnode->tn_size += sizeof(struct tmpfs_dirent);
+	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
+	    TMPFS_NODE_MODIFIED;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Detaches the directory entry de from the directory represented by vp.
+ * Note that this does not change the link count of the node pointed by
+ * the directory entry, as this is done by tmpfs_free_dirent.
+ */
+void
+tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
+{
+	struct tmpfs_node *dnode;
+
+	ASSERT_VOP_ELOCKED(vp, __func__);
+	dnode = VP_TO_TMPFS_DIR(vp);
+
+	if (dnode->tn_dir.tn_readdir_lastp == de) {
+		dnode->tn_dir.tn_readdir_lastn = 0;
+		dnode->tn_dir.tn_readdir_lastp = NULL;
+	}
+
+	TAILQ_REMOVE(&dnode->tn_dir.tn_dirhead, de, td_entries);
+	dnode->tn_size -= sizeof(struct tmpfs_dirent);
+	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
+	    TMPFS_NODE_MODIFIED;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Looks for a directory entry in the directory represented by node.
+ * 'cnp' describes the name of the entry to look for.  Note that the .
+ * and .. components are not allowed as they do not physically exist
+ * within directories.
+ *
+ * Returns a pointer to the entry when found, otherwise NULL.
+ */
+struct tmpfs_dirent *
+tmpfs_dir_lookup(struct tmpfs_node *node, struct componentname *cnp)
+{
+	boolean_t found;
+	struct tmpfs_dirent *de;
+
+	MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
+	MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
+	    cnp->cn_nameptr[1] == '.')));
+	TMPFS_VALIDATE_DIR(node);
+
+	found = 0;
+	TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) {
+		MPASS(cnp->cn_namelen < 0xffff);
+		if (de->td_namelen == (uint16_t)cnp->cn_namelen &&
+		    memcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) {
+			found = 1;
+			break;
+		}
+	}
+	node->tn_status |= TMPFS_NODE_ACCESSED;
+
+	return found ? de : NULL;
+}
+
+struct tmpfs_dirent *
+tmpfs_dir_search(struct tmpfs_node *node, struct tmpfs_node *f)
+{
+	struct tmpfs_dirent *de;
+
+	TMPFS_VALIDATE_DIR(node);
+	node->tn_status |= TMPFS_NODE_ACCESSED;
+	TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) {
+		if (de->td_node == f)
+			return (de);
+	}
+	return (NULL);
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Helper function for tmpfs_readdir.  Creates a '.' entry for the given
+ * directory and returns it in the uio space.  The function returns 0
+ * on success, -1 if there was not enough space in the uio structure to
+ * hold the directory entry or an appropriate error code if another
+ * error happens.
+ */
+int
+tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
+{
+	int error;
+	struct dirent dent;
+
+	TMPFS_VALIDATE_DIR(node);
+	MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT);
+
+	dent.d_fileno = node->tn_id;
+	dent.d_type = DT_DIR;
+	dent.d_namlen = 1;
+	dent.d_name[0] = '.';
+	dent.d_name[1] = '\0';
+	dent.d_reclen = GENERIC_DIRSIZ(&dent);
+
+	if (dent.d_reclen > uio->uio_resid)
+		error = -1;
+	else {
+		error = uiomove(&dent, dent.d_reclen, uio);
+		if (error == 0)
+			uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
+	}
+
+	node->tn_status |= TMPFS_NODE_ACCESSED;
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Helper function for tmpfs_readdir.  Creates a '..' entry for the given
+ * directory and returns it in the uio space.  The function returns 0
+ * on success, -1 if there was not enough space in the uio structure to
+ * hold the directory entry or an appropriate error code if another
+ * error happens.
+ */
+int
+tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
+{
+	int error;
+	struct dirent dent;
+
+	TMPFS_VALIDATE_DIR(node);
+	MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
+
+	dent.d_fileno = node->tn_dir.tn_parent->tn_id;
+	dent.d_type = DT_DIR;
+	dent.d_namlen = 2;
+	dent.d_name[0] = '.';
+	dent.d_name[1] = '.';
+	dent.d_name[2] = '\0';
+	dent.d_reclen = GENERIC_DIRSIZ(&dent);
+
+	if (dent.d_reclen > uio->uio_resid)
+		error = -1;
+	else {
+		error = uiomove(&dent, dent.d_reclen, uio);
+		if (error == 0) {
+			struct tmpfs_dirent *de;
+
+			de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
+			if (de == NULL)
+				uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
+			else
+				uio->uio_offset = tmpfs_dircookie(de);
+		}
+	}
+
+	node->tn_status |= TMPFS_NODE_ACCESSED;
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Lookup a directory entry by its associated cookie.
+ */
+struct tmpfs_dirent *
+tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie)
+{
+	struct tmpfs_dirent *de;
+
+	if (cookie == node->tn_dir.tn_readdir_lastn &&
+	    node->tn_dir.tn_readdir_lastp != NULL) {
+		return node->tn_dir.tn_readdir_lastp;
+	}
+
+	TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) {
+		if (tmpfs_dircookie(de) == cookie) {
+			break;
+		}
+	}
+
+	return de;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Helper function for tmpfs_readdir.  Returns as much directory entries
+ * as can fit in the uio space.  The read starts at uio->uio_offset.
+ * The function returns 0 on success, -1 if there was not enough space
+ * in the uio structure to hold the directory entry or an appropriate
+ * error code if another error happens.
+ */
+int
+tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp)
+{
+	int error;
+	off_t startcookie;
+	struct tmpfs_dirent *de;
+
+	TMPFS_VALIDATE_DIR(node);
+
+	/* Locate the first directory entry we have to return.  We have cached
+	 * the last readdir in the node, so use those values if appropriate.
+	 * Otherwise do a linear scan to find the requested entry. */
+	startcookie = uio->uio_offset;
+	MPASS(startcookie != TMPFS_DIRCOOKIE_DOT);
+	MPASS(startcookie != TMPFS_DIRCOOKIE_DOTDOT);
+	if (startcookie == TMPFS_DIRCOOKIE_EOF) {
+		return 0;
+	} else {
+		de = tmpfs_dir_lookupbycookie(node, startcookie);
+	}
+	if (de == NULL) {
+		return EINVAL;
+	}
+
+	/* Read as much entries as possible; i.e., until we reach the end of
+	 * the directory or we exhaust uio space. */
+	do {
+		struct dirent d;
+
+		/* Create a dirent structure representing the current
+		 * tmpfs_node and fill it. */
+		d.d_fileno = de->td_node->tn_id;
+		switch (de->td_node->tn_type) {
+		case VBLK:
+			d.d_type = DT_BLK;
+			break;
+
+		case VCHR:
+			d.d_type = DT_CHR;
+			break;
+
+		case VDIR:
+			d.d_type = DT_DIR;
+			break;
+
+		case VFIFO:
+			d.d_type = DT_FIFO;
+			break;
+
+		case VLNK:
+			d.d_type = DT_LNK;
+			break;
+
+		case VREG:
+			d.d_type = DT_REG;
+			break;
+
+		case VSOCK:
+			d.d_type = DT_SOCK;
+			break;
+
+		default:
+			MPASS(0);
+		}
+		d.d_namlen = de->td_namelen;
+		MPASS(de->td_namelen < sizeof(d.d_name));
+		(void)memcpy(d.d_name, de->td_name, de->td_namelen);
+		d.d_name[de->td_namelen] = '\0';
+		d.d_reclen = GENERIC_DIRSIZ(&d);
+
+		/* Stop reading if the directory entry we are treating is
+		 * bigger than the amount of data that can be returned. */
+		if (d.d_reclen > uio->uio_resid) {
+			error = -1;
+			break;
+		}
+
+		/* Copy the new dirent structure into the output buffer and
+		 * advance pointers. */
+		error = uiomove(&d, d.d_reclen, uio);
+
+		(*cntp)++;
+		de = TAILQ_NEXT(de, td_entries);
+	} while (error == 0 && uio->uio_resid > 0 && de != NULL);
+
+	/* Update the offset and cache. */
+	if (de == NULL) {
+		uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
+		node->tn_dir.tn_readdir_lastn = 0;
+		node->tn_dir.tn_readdir_lastp = NULL;
+	} else {
+		node->tn_dir.tn_readdir_lastn = uio->uio_offset = tmpfs_dircookie(de);
+		node->tn_dir.tn_readdir_lastp = de;
+	}
+
+	node->tn_status |= TMPFS_NODE_ACCESSED;
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Resizes the aobj associated to the regular file pointed to by vp to
+ * the size newsize.  'vp' must point to a vnode that represents a regular
+ * file.  'newsize' must be positive.
+ *
+ * Returns zero on success or an appropriate error code on failure.
+ */
+int
+tmpfs_reg_resize(struct vnode *vp, off_t newsize)
+{
+	int error;
+	size_t newpages, oldpages;
+	struct tmpfs_mount *tmp;
+	struct tmpfs_node *node;
+	off_t oldsize;
+
+	MPASS(vp->v_type == VREG);
+	MPASS(newsize >= 0);
+
+	node = VP_TO_TMPFS_NODE(vp);
+	tmp = VFS_TO_TMPFS(vp->v_mount);
+
+	/* Convert the old and new sizes to the number of pages needed to
+	 * store them.  It may happen that we do not need to do anything
+	 * because the last allocated page can accommodate the change on
+	 * its own. */
+	oldsize = node->tn_size;
+	oldpages = round_page(oldsize) / PAGE_SIZE;
+	MPASS(oldpages == node->tn_reg.tn_aobj_pages);
+	newpages = round_page(newsize) / PAGE_SIZE;
+
+	if (newpages > oldpages &&
+	    newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) {
+		error = ENOSPC;
+		goto out;
+	}
+
+	node->tn_reg.tn_aobj_pages = newpages;
+
+	TMPFS_LOCK(tmp);
+	tmp->tm_pages_used += (newpages - oldpages);
+	TMPFS_UNLOCK(tmp);
+
+	node->tn_size = newsize;
+	vnode_pager_setsize(vp, newsize);
+	if (newsize < oldsize) {
+		size_t zerolen = round_page(newsize) - newsize;
+		vm_object_t uobj = node->tn_reg.tn_aobj;
+		vm_page_t m;
+
+		/*
+		 * free "backing store"
+		 */
+		VM_OBJECT_LOCK(uobj);
+		if (newpages < oldpages) {
+			swap_pager_freespace(uobj,
+						newpages, oldpages - newpages);
+			vm_object_page_remove(uobj,
+				OFF_TO_IDX(newsize + PAGE_MASK), 0, FALSE);
+		}
+
+		/*
+		 * zero out the truncated part of the last page.
+		 */
+
+		if (zerolen > 0) {
+			m = vm_page_grab(uobj, OFF_TO_IDX(newsize),
+					VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
+			pmap_zero_page_area(m, PAGE_SIZE - zerolen,
+				zerolen);
+			vm_page_wakeup(m);
+		}
+		VM_OBJECT_UNLOCK(uobj);
+
+	}
+
+	error = 0;
+
+out:
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Change flags of the given vnode.
+ * Caller should execute tmpfs_update on vp after a successful execution.
+ * The vnode must be locked on entry and remain locked on exit.
+ */
+int
+tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct thread *p)
+{
+	int error;
+	struct tmpfs_node *node;
+
+	MPASS(VOP_ISLOCKED(vp, p));
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	/* Disallow this operation if the file system is mounted read-only. */
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		return EROFS;
+
+	/*
+	 * Callers may only modify the file flags on objects they
+	 * have VADMIN rights for.
+	 */
+	if ((error = VOP_ACCESS(vp, VADMIN, cred, p)))
+		return (error);
+	/*
+	 * Unprivileged processes are not permitted to unset system
+	 * flags, or modify flags if any system flags are set.
+	 */
+	if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
+		if (node->tn_flags
+		  & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
+			error = securelevel_gt(cred, 0);
+			if (error)
+				return (error);
+		}
+		/* Snapshot flag cannot be set or cleared */
+		if (((flags & SF_SNAPSHOT) != 0 &&
+		  (node->tn_flags & SF_SNAPSHOT) == 0) ||
+		  ((flags & SF_SNAPSHOT) == 0 &&
+		  (node->tn_flags & SF_SNAPSHOT) != 0))
+			return (EPERM);
+		node->tn_flags = flags;
+	} else {
+		if (node->tn_flags
+		  & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
+		  (flags & UF_SETTABLE) != flags)
+			return (EPERM);
+		node->tn_flags &= SF_SETTABLE;
+		node->tn_flags |= (flags & UF_SETTABLE);
+	}
+	node->tn_status |= TMPFS_NODE_CHANGED;
+
+	MPASS(VOP_ISLOCKED(vp, p));
+
+	return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Change access mode on the given vnode.
+ * Caller should execute tmpfs_update on vp after a successful execution.
+ * The vnode must be locked on entry and remain locked on exit.
+ */
+int
+tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p)
+{
+	int error;
+	struct tmpfs_node *node;
+
+	MPASS(VOP_ISLOCKED(vp, p));
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	/* Disallow this operation if the file system is mounted read-only. */
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		return EROFS;
+
+	/* Immutable or append-only files cannot be modified, either. */
+	if (node->tn_flags & (IMMUTABLE | APPEND))
+		return EPERM;
+
+	/*
+	 * To modify the permissions on a file, must possess VADMIN
+	 * for that file.
+	 */
+	if ((error = VOP_ACCESS(vp, VADMIN, cred, p)))
+		return (error);
+
+	/*
+	 * Privileged processes may set the sticky bit on non-directories,
+	 * as well as set the setgid bit on a file with a group that the
+	 * process is not a member of.
+	 */
+	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
+		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0))
+			return (EFTYPE);
+	}
+	if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) {
+		error = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
+		if (error)
+			return (error);
+	}
+
+
+	node->tn_mode &= ~ALLPERMS;
+	node->tn_mode |= mode & ALLPERMS;
+
+	node->tn_status |= TMPFS_NODE_CHANGED;
+
+	MPASS(VOP_ISLOCKED(vp, p));
+
+	return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Change ownership of the given vnode.  At least one of uid or gid must
+ * be different than VNOVAL.  If one is set to that value, the attribute
+ * is unchanged.
+ * Caller should execute tmpfs_update on vp after a successful execution.
+ * The vnode must be locked on entry and remain locked on exit.
+ */
+int
+tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
+    struct thread *p)
+{
+	int error;
+	struct tmpfs_node *node;
+	uid_t ouid;
+	gid_t ogid;
+
+	MPASS(VOP_ISLOCKED(vp, p));
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	/* Assign default values if they are unknown. */
+	MPASS(uid != VNOVAL || gid != VNOVAL);
+	if (uid == VNOVAL)
+		uid = node->tn_uid;
+	if (gid == VNOVAL)
+		gid = node->tn_gid;
+	MPASS(uid != VNOVAL && gid != VNOVAL);
+
+	/* Disallow this operation if the file system is mounted read-only. */
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		return EROFS;
+
+	/* Immutable or append-only files cannot be modified, either. */
+	if (node->tn_flags & (IMMUTABLE | APPEND))
+		return EPERM;
+
+	/*
+	 * To modify the ownership of a file, must possess VADMIN for that
+	 * file.
+	 */
+	if ((error = VOP_ACCESS(vp, VADMIN, cred, p)))
+		return (error);
+
+	/*
+	 * To change the owner of a file, or change the group of a file to a
+	 * group of which we are not a member, the caller must have
+	 * privilege.
+	 */
+	if ((uid != node->tn_uid ||
+	    (gid != node->tn_gid && !groupmember(gid, cred))) &&
+	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0)))
+		return (error);
+
+	ogid = node->tn_gid;
+	ouid = node->tn_uid;
+
+	node->tn_uid = uid;
+	node->tn_gid = gid;
+
+	node->tn_status |= TMPFS_NODE_CHANGED;
+
+	if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) {
+		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0))
+			node->tn_mode &= ~(S_ISUID | S_ISGID);
+	}
+
+	MPASS(VOP_ISLOCKED(vp, p));
+
+	return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Change size of the given vnode.
+ * Caller should execute tmpfs_update on vp after a successful execution.
+ * The vnode must be locked on entry and remain locked on exit.
+ */
+int
+tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred,
+    struct thread *p)
+{
+	int error;
+	struct tmpfs_node *node;
+
+	MPASS(VOP_ISLOCKED(vp, p));
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	/* Decide whether this is a valid operation based on the file type. */
+	error = 0;
+	switch (vp->v_type) {
+	case VDIR:
+		return EISDIR;
+
+	case VREG:
+		if (vp->v_mount->mnt_flag & MNT_RDONLY)
+			return EROFS;
+		break;
+
+	case VBLK:
+		/* FALLTHROUGH */
+	case VCHR:
+		/* FALLTHROUGH */
+	case VFIFO:
+		/* Allow modifications of special files even if in the file
+		 * system is mounted read-only (we are not modifying the
+		 * files themselves, but the objects they represent). */
+		return 0;
+
+	default:
+		/* Anything else is unsupported. */
+		return EOPNOTSUPP;
+	}
+
+	/* Immutable or append-only files cannot be modified, either. */
+	if (node->tn_flags & (IMMUTABLE | APPEND))
+		return EPERM;
+
+	error = tmpfs_truncate(vp, size);
+	/* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents
+	 * for us, as will update tn_status; no need to do that here. */
+
+	MPASS(VOP_ISLOCKED(vp, p));
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Change access and modification times of the given vnode.
+ * Caller should execute tmpfs_update on vp after a successful execution.
+ * The vnode must be locked on entry and remain locked on exit.
+ */
+int
+tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime,
+	struct timespec *birthtime, int vaflags, struct ucred *cred, struct thread *l)
+{
+	int error;
+	struct tmpfs_node *node;
+
+	MPASS(VOP_ISLOCKED(vp, l));
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	/* Disallow this operation if the file system is mounted read-only. */
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		return EROFS;
+
+	/* Immutable or append-only files cannot be modified, either. */
+	if (node->tn_flags & (IMMUTABLE | APPEND))
+		return EPERM;
+
+	/* Determine if the user have proper privilege to update time. */
+	if (vaflags & VA_UTIMES_NULL) {
+		error = VOP_ACCESS(vp, VADMIN, cred, l);
+		if (error)
+			error = VOP_ACCESS(vp, VWRITE, cred, l);
+	} else
+		error = VOP_ACCESS(vp, VADMIN, cred, l);
+	if (error)
+		return (error);
+
+	if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL)
+		node->tn_status |= TMPFS_NODE_ACCESSED;
+
+	if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL)
+		node->tn_status |= TMPFS_NODE_MODIFIED;
+
+	if (birthtime->tv_nsec != VNOVAL && birthtime->tv_nsec != VNOVAL)
+		node->tn_status |= TMPFS_NODE_MODIFIED;
+
+	tmpfs_itimes(vp, atime, mtime);
+
+	if (birthtime->tv_nsec != VNOVAL && birthtime->tv_nsec != VNOVAL)
+		node->tn_birthtime = *birthtime;
+	MPASS(VOP_ISLOCKED(vp, l));
+
+	return 0;
+}
+
+/* --------------------------------------------------------------------- */
+/* Sync timestamps */
+void
+tmpfs_itimes(struct vnode *vp, const struct timespec *acc,
+    const struct timespec *mod)
+{
+	struct tmpfs_node *node;
+	struct timespec now;
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
+	    TMPFS_NODE_CHANGED)) == 0)
+		return;
+
+	vfs_timestamp(&now);
+	if (node->tn_status & TMPFS_NODE_ACCESSED) {
+		if (acc == NULL)
+			 acc = &now;
+		node->tn_atime = *acc;
+	}
+	if (node->tn_status & TMPFS_NODE_MODIFIED) {
+		if (mod == NULL)
+			mod = &now;
+		node->tn_mtime = *mod;
+	}
+	if (node->tn_status & TMPFS_NODE_CHANGED) {
+		node->tn_ctime = now;
+	}
+	node->tn_status &=
+	    ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED);
+}
+
+/* --------------------------------------------------------------------- */
+
+void
+tmpfs_update(struct vnode *vp)
+{
+
+	tmpfs_itimes(vp, NULL, NULL);
+}
+
+/* --------------------------------------------------------------------- */
+
+int
+tmpfs_truncate(struct vnode *vp, off_t length)
+{
+	boolean_t extended;
+	int error;
+	struct tmpfs_node *node;
+
+	node = VP_TO_TMPFS_NODE(vp);
+	extended = length > node->tn_size;
+
+	if (length < 0) {
+		error = EINVAL;
+		goto out;
+	}
+
+	if (node->tn_size == length) {
+		error = 0;
+		goto out;
+	}
+
+	if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
+		return (EFBIG);
+
+	error = tmpfs_reg_resize(vp, length);
+	if (error == 0) {
+		node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
+	}
+
+out:
+	tmpfs_update(vp);
+
+	return error;
+}
--- /dev/null
+++ sys/fs/tmpfs/tmpfs_fifoops.c
@@ -0,0 +1,106 @@
+/*	$NetBSD: tmpfs_fifoops.c,v 1.5 2005/12/11 12:24:29 christos Exp $	*/
+
+/*
+ * Copyright (c) 2005 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
+ * 2005 program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * tmpfs vnode interface for named pipes.
+ */
+#include <sys/cdefs.h>
+ __FBSDID("$FreeBSD: src/sys/fs/tmpfs/tmpfs_fifoops.c,v 1.3 2007/06/28 02:39:31 delphij Exp $");
+
+#include <sys/param.h>
+#include <sys/filedesc.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+
+#include <fs/tmpfs/tmpfs.h>
+#include <fs/tmpfs/tmpfs_fifoops.h>
+#include <fs/tmpfs/tmpfs_vnops.h>
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_fifo_kqfilter(struct vop_kqfilter_args *ap)
+{
+	struct vnode *vp;
+	struct tmpfs_node *node;
+
+	vp = ap->a_vp;
+	node = VP_TO_TMPFS_NODE(vp);
+
+	switch (ap->a_kn->kn_filter){
+	case EVFILT_READ:
+		node->tn_status |= TMPFS_NODE_ACCESSED;
+		break;
+	case EVFILT_WRITE:
+		node->tn_status |= TMPFS_NODE_MODIFIED;
+		break;
+	}
+
+	return fifo_specops.vop_kqfilter(ap);
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_fifo_close(struct vop_close_args *v)
+{
+	struct tmpfs_node *node;
+	node = VP_TO_TMPFS_NODE(v->a_vp);
+	node->tn_status |= TMPFS_NODE_ACCESSED;
+
+	tmpfs_update(v->a_vp);
+	return fifo_specops.vop_close(v);
+}
+
+/*
+ * vnode operations vector used for fifos stored in a tmpfs file system.
+ */
+struct vop_vector tmpfs_fifoop_entries = {
+	.vop_default =			&fifo_specops,
+	.vop_close =			tmpfs_fifo_close,
+	.vop_reclaim =			tmpfs_reclaim,
+	.vop_access =			tmpfs_access,
+	.vop_getattr =			tmpfs_getattr,
+	.vop_setattr =			tmpfs_setattr,
+	.vop_kqfilter =			tmpfs_fifo_kqfilter,
+};
+
--- /dev/null
+++ sys/fs/tmpfs/tmpfs_vnops.c
@@ -0,0 +1,1497 @@
+/*	$NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $	*/
+
+/*
+ * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
+ * 2005 program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * tmpfs vnode interface.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/tmpfs/tmpfs_vnops.c,v 1.11.2.1 2007/11/26 06:46:44 delphij Exp $");
+
+#include <sys/param.h>
+#include <sys/fcntl.h>
+#include <sys/lockf.h>
+#include <sys/namei.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/stat.h>
+#include <sys/systm.h>
+#include <sys/unistd.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <sys/sched.h>
+#include <sys/sf_buf.h>
+#include <machine/_inttypes.h>
+
+#include <fs/fifofs/fifo.h>
+#include <fs/tmpfs/tmpfs_vnops.h>
+#include <fs/tmpfs/tmpfs.h>
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_lookup(struct vop_cachedlookup_args *v)
+{
+	struct vnode *dvp = v->a_dvp;
+	struct vnode **vpp = v->a_vpp;
+	struct componentname *cnp = v->a_cnp;
+	struct thread *td = cnp->cn_thread;
+
+	int error;
+	struct tmpfs_dirent *de;
+	struct tmpfs_node *dnode;
+
+	dnode = VP_TO_TMPFS_DIR(dvp);
+	*vpp = NULLVP;
+
+	/* Check accessibility of requested node as a first step. */
+	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td);
+	if (error != 0)
+		goto out;
+
+	/* We cannot be requesting the parent directory of the root node. */
+	MPASS(IMPLIES(dnode->tn_type == VDIR &&
+	    dnode->tn_dir.tn_parent == dnode,
+	    !(cnp->cn_flags & ISDOTDOT)));
+
+	if (cnp->cn_flags & ISDOTDOT) {
+		int ltype = 0;
+
+		ltype = VOP_ISLOCKED(dvp, td);
+		vhold(dvp);
+		VOP_UNLOCK(dvp, 0, td);
+		/* Allocate a new vnode on the matching entry. */
+		error = tmpfs_alloc_vp(dvp->v_mount, dnode->tn_dir.tn_parent,
+		    cnp->cn_lkflags, vpp, td);
+
+		vn_lock(dvp, ltype | LK_RETRY, td);
+		vdrop(dvp);
+	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
+		VREF(dvp);
+		*vpp = dvp;
+		error = 0;
+	} else {
+		de = tmpfs_dir_lookup(dnode, cnp);
+		if (de == NULL) {
+			/* The entry was not found in the directory.
+			 * This is OK if we are creating or renaming an
+			 * entry and are working on the last component of
+			 * the path name. */
+			if ((cnp->cn_flags & ISLASTCN) &&
+			    (cnp->cn_nameiop == CREATE || \
+			    cnp->cn_nameiop == RENAME)) {
+				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
+				    cnp->cn_thread);
+				if (error != 0)
+					goto out;
+
+				/* Keep the component name in the buffer for
+				 * future uses. */
+				cnp->cn_flags |= SAVENAME;
+
+				error = EJUSTRETURN;
+			} else
+				error = ENOENT;
+		} else {
+			struct tmpfs_node *tnode;
+
+			/* The entry was found, so get its associated
+			 * tmpfs_node. */
+			tnode = de->td_node;
+
+			/* If we are not at the last path component and
+			 * found a non-directory or non-link entry (which
+			 * may itself be pointing to a directory), raise
+			 * an error. */
+			if ((tnode->tn_type != VDIR &&
+			    tnode->tn_type != VLNK) &&
+			    !(cnp->cn_flags & ISLASTCN)) {
+				error = ENOTDIR;
+				goto out;
+			}
+
+			/* If we are deleting or renaming the entry, keep
+			 * track of its tmpfs_dirent so that it can be
+			 * easily deleted later. */
+			if ((cnp->cn_flags & ISLASTCN) &&
+			    (cnp->cn_nameiop == DELETE ||
+			    cnp->cn_nameiop == RENAME)) {
+				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
+				    cnp->cn_thread);
+				if (error != 0)
+					goto out;
+
+				/* Allocate a new vnode on the matching entry. */
+				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
+						cnp->cn_lkflags, vpp, td);
+				if (error != 0)
+					goto out;
+
+				if ((dnode->tn_mode & S_ISTXT) &&
+				  VOP_ACCESS(dvp, VADMIN, cnp->cn_cred, cnp->cn_thread) &&
+				  VOP_ACCESS(*vpp, VADMIN, cnp->cn_cred, cnp->cn_thread)) {
+					error = EPERM;
+					vput(*vpp);
+					*vpp = NULL;
+					goto out;
+				}
+				cnp->cn_flags |= SAVENAME;
+			} else {
+				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
+						cnp->cn_lkflags, vpp, td);
+			}
+		}
+	}
+
+	/* Store the result of this lookup in the cache.  Avoid this if the
+	 * request was for creation, as it does not improve timings on
+	 * emprical tests. */
+	if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE)
+		cache_enter(dvp, *vpp, cnp);
+
+out:
+	/* If there were no errors, *vpp cannot be null and it must be
+	 * locked. */
+	MPASS(IFF(error == 0, *vpp != NULLVP && VOP_ISLOCKED(*vpp, td)));
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_create(struct vop_create_args *v)
+{
+	struct vnode *dvp = v->a_dvp;
+	struct vnode **vpp = v->a_vpp;
+	struct componentname *cnp = v->a_cnp;
+	struct vattr *vap = v->a_vap;
+
+	MPASS(vap->va_type == VREG || vap->va_type == VSOCK);
+
+	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
+}
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_mknod(struct vop_mknod_args *v)
+{
+	struct vnode *dvp = v->a_dvp;
+	struct vnode **vpp = v->a_vpp;
+	struct componentname *cnp = v->a_cnp;
+	struct vattr *vap = v->a_vap;
+
+	if (vap->va_type != VBLK && vap->va_type != VCHR &&
+	    vap->va_type != VFIFO)
+		return EINVAL;
+
+	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_open(struct vop_open_args *v)
+{
+	struct vnode *vp = v->a_vp;
+	int mode = v->a_mode;
+
+	int error;
+	struct tmpfs_node *node;
+
+	MPASS(VOP_ISLOCKED(vp, v->a_td));
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	/* The file is still active but all its names have been removed
+	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
+	 * it is about to die. */
+	if (node->tn_links < 1)
+		return (ENOENT);
+
+	/* If the file is marked append-only, deny write requests. */
+	if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE)
+		error = EPERM;
+	else {
+		error = 0;
+		vnode_create_vobject(vp, node->tn_size, v->a_td);
+	}
+
+	MPASS(VOP_ISLOCKED(vp, v->a_td));
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_close(struct vop_close_args *v)
+{
+	struct vnode *vp = v->a_vp;
+
+	struct tmpfs_node *node;
+
+	MPASS(VOP_ISLOCKED(vp, v->a_td));
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	if (node->tn_links > 0) {
+		/* Update node times.  No need to do it if the node has
+		 * been deleted, because it will vanish after we return. */
+		tmpfs_update(vp);
+	}
+
+	return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+int
+tmpfs_access(struct vop_access_args *v)
+{
+	struct vnode *vp = v->a_vp;
+	int mode = v->a_mode;
+	struct ucred *cred = v->a_cred;
+
+	int error;
+	struct tmpfs_node *node;
+
+	MPASS(VOP_ISLOCKED(vp, v->a_td));
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	switch (vp->v_type) {
+	case VDIR:
+		/* FALLTHROUGH */
+	case VLNK:
+		/* FALLTHROUGH */
+	case VREG:
+		if (mode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) {
+			error = EROFS;
+			goto out;
+		}
+		break;
+
+	case VBLK:
+		/* FALLTHROUGH */
+	case VCHR:
+		/* FALLTHROUGH */
+	case VSOCK:
+		/* FALLTHROUGH */
+	case VFIFO:
+		break;
+
+	default:
+		error = EINVAL;
+		goto out;
+	}
+
+	if (mode & VWRITE && node->tn_flags & IMMUTABLE) {
+		error = EPERM;
+		goto out;
+	}
+
+	error = vaccess(vp->v_type, node->tn_mode, node->tn_uid,
+	    node->tn_gid, mode, cred, NULL);
+
+out:
+	MPASS(VOP_ISLOCKED(vp, v->a_td));
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+int
+tmpfs_getattr(struct vop_getattr_args *v)
+{
+	struct vnode *vp = v->a_vp;
+	struct vattr *vap = v->a_vap;
+
+	struct tmpfs_node *node;
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	VATTR_NULL(vap);
+
+	tmpfs_update(vp);
+
+	vap->va_type = vp->v_type;
+	vap->va_mode = node->tn_mode;
+	vap->va_nlink = node->tn_links;
+	vap->va_uid = node->tn_uid;
+	vap->va_gid = node->tn_gid;
+	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+	vap->va_fileid = node->tn_id;
+	vap->va_size = node->tn_size;
+	vap->va_blocksize = PAGE_SIZE;
+	vap->va_atime = node->tn_atime;
+	vap->va_mtime = node->tn_mtime;
+	vap->va_ctime = node->tn_ctime;
+	vap->va_birthtime = node->tn_birthtime;
+	vap->va_gen = node->tn_gen;
+	vap->va_flags = node->tn_flags;
+	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
+		node->tn_rdev : VNOVAL;
+	vap->va_bytes = round_page(node->tn_size);
+	vap->va_filerev = VNOVAL;
+	vap->va_vaflags = 0;
+	vap->va_spare = VNOVAL; /* XXX */
+
+	return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+/* XXX Should this operation be atomic?  I think it should, but code in
+ * XXX other places (e.g., ufs) doesn't seem to be... */
+int
+tmpfs_setattr(struct vop_setattr_args *v)
+{
+	struct vnode *vp = v->a_vp;
+	struct vattr *vap = v->a_vap;
+	struct ucred *cred = v->a_cred;
+	struct thread *l = v->a_td;
+
+	int error;
+
+	MPASS(VOP_ISLOCKED(vp, l));
+
+	error = 0;
+
+	/* Abort if any unsettable attribute is given. */
+	if (vap->va_type != VNON ||
+	    vap->va_nlink != VNOVAL ||
+	    vap->va_fsid != VNOVAL ||
+	    vap->va_fileid != VNOVAL ||
+	    vap->va_blocksize != VNOVAL ||
+	    vap->va_gen != VNOVAL ||
+	    vap->va_rdev != VNOVAL ||
+	    vap->va_bytes != VNOVAL)
+		error = EINVAL;
+
+	if (error == 0 && (vap->va_flags != VNOVAL))
+		error = tmpfs_chflags(vp, vap->va_flags, cred, l);
+
+	if (error == 0 && (vap->va_size != VNOVAL))
+		error = tmpfs_chsize(vp, vap->va_size, cred, l);
+
+	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
+		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred,
+		    l);
+
+	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
+		error = tmpfs_chmod(vp, vap->va_mode, cred, l);
+
+	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
+	    vap->va_atime.tv_nsec != VNOVAL) ||
+	    (vap->va_mtime.tv_sec != VNOVAL &&
+	    vap->va_mtime.tv_nsec != VNOVAL) ||
+	    (vap->va_birthtime.tv_sec != VNOVAL &&
+	    vap->va_birthtime.tv_nsec != VNOVAL)))
+		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
+			&vap->va_birthtime, vap->va_vaflags, cred, l);
+
+	/* Update the node times.  We give preference to the error codes
+	 * generated by this function rather than the ones that may arise
+	 * from tmpfs_update. */
+	tmpfs_update(vp);
+
+	MPASS(VOP_ISLOCKED(vp, l));
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_mappedread(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio)
+{
+	vm_pindex_t	idx;
+	vm_page_t	m;
+	struct sf_buf	*sf;
+	off_t		offset, addr;
+	size_t		tlen;
+	caddr_t		va;
+	int		error;
+
+	addr = uio->uio_offset;
+	idx = OFF_TO_IDX(addr);
+	offset = addr & PAGE_MASK;
+	tlen = MIN(PAGE_SIZE - offset, len);
+
+	if ((vobj == NULL) || (vobj->resident_page_count == 0))
+		goto nocache;
+
+	VM_OBJECT_LOCK(vobj);
+lookupvpg:
+	if (((m = vm_page_lookup(vobj, idx)) != NULL) &&
+	    vm_page_is_valid(m, offset, tlen)) {
+		if (vm_page_sleep_if_busy(m, FALSE, "tmfsmr"))
+			goto lookupvpg;
+		vm_page_busy(m);
+		VM_OBJECT_UNLOCK(vobj);
+		sched_pin();
+		sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
+		va = (caddr_t)sf_buf_kva(sf);
+		error = uiomove(va + offset, tlen, uio);
+		sf_buf_free(sf);
+		sched_unpin();
+		VM_OBJECT_LOCK(vobj);
+		vm_page_wakeup(m);
+		VM_OBJECT_UNLOCK(vobj);
+		return	(error);
+	}
+	VM_OBJECT_UNLOCK(vobj);
+nocache:
+	VM_OBJECT_LOCK(tobj);
+	vm_object_pip_add(tobj, 1);
+	m = vm_page_grab(tobj, idx, VM_ALLOC_WIRED |
+	    VM_ALLOC_ZERO | VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
+	if (m->valid != VM_PAGE_BITS_ALL) {
+		int behind, ahead;
+		if (vm_pager_has_page(tobj, idx, &behind, &ahead)) {
+			error = vm_pager_get_pages(tobj, &m, 1, 0);
+			if (error != 0) {
+				printf("tmpfs get pages from pager error [read]\n");
+				goto out;
+			}
+		} else
+			vm_page_zero_invalid(m, TRUE);
+	}
+	VM_OBJECT_UNLOCK(tobj);
+	sched_pin();
+	sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
+	va = (caddr_t)sf_buf_kva(sf);
+	error = uiomove(va + offset, tlen, uio);
+	sf_buf_free(sf);
+	sched_unpin();
+	VM_OBJECT_LOCK(tobj);
+out:
+	vm_page_lock_queues();
+	vm_page_unwire(m, 0);
+	vm_page_activate(m);
+	vm_page_unlock_queues();
+	vm_page_wakeup(m);
+	vm_object_pip_subtract(tobj, 1);
+	VM_OBJECT_UNLOCK(tobj);
+
+	return	(error);
+}
+
+static int
+tmpfs_read(struct vop_read_args *v)
+{
+	struct vnode *vp = v->a_vp;
+	struct uio *uio = v->a_uio;
+
+	struct tmpfs_node *node;
+	vm_object_t uobj;
+	size_t len;
+	int resid;
+
+	int error;
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	if (vp->v_type != VREG) {
+		error = EISDIR;
+		goto out;
+	}
+
+	if (uio->uio_offset < 0) {
+		error = EINVAL;
+		goto out;
+	}
+
+	node->tn_status |= TMPFS_NODE_ACCESSED;
+
+	uobj = node->tn_reg.tn_aobj;
+	while ((resid = uio->uio_resid) > 0) {
+		error = 0;
+		if (node->tn_size <= uio->uio_offset)
+			break;
+		len = MIN(node->tn_size - uio->uio_offset, resid);
+		if (len == 0)
+			break;
+		error = tmpfs_mappedread(vp->v_object, uobj, len, uio);
+		if ((error != 0) || (resid == uio->uio_resid))
+			break;
+	}
+
+out:
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_mappedwrite(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio)
+{
+	vm_pindex_t	idx;
+	vm_page_t	vpg, tpg;
+	struct sf_buf	*sf;
+	off_t		offset, addr;
+	size_t		tlen;
+	caddr_t		va;
+	int		error;
+
+	addr = uio->uio_offset;
+	idx = OFF_TO_IDX(addr);
+	offset = addr & PAGE_MASK;
+	tlen = MIN(PAGE_SIZE - offset, len);
+
+	if ((vobj == NULL) || (vobj->resident_page_count == 0)) {
+		vpg = NULL;
+		goto nocache;
+	}
+
+	VM_OBJECT_LOCK(vobj);
+lookupvpg:
+	if (((vpg = vm_page_lookup(vobj, idx)) != NULL) &&
+	    vm_page_is_valid(vpg, offset, tlen)) {
+		if (vm_page_sleep_if_busy(vpg, FALSE, "tmfsmw"))
+			goto lookupvpg;
+		vm_page_busy(vpg);
+		vm_page_lock_queues();
+		vm_page_undirty(vpg);
+		vm_page_unlock_queues();
+		VM_OBJECT_UNLOCK(vobj);
+		sched_pin();
+		sf = sf_buf_alloc(vpg, SFB_CPUPRIVATE);
+		va = (caddr_t)sf_buf_kva(sf);
+		error = uiomove(va + offset, tlen, uio);
+		sf_buf_free(sf);
+		sched_unpin();
+	} else {
+		VM_OBJECT_UNLOCK(vobj);
+		vpg = NULL;
+	}
+nocache:
+	VM_OBJECT_LOCK(tobj);
+	vm_object_pip_add(tobj, 1);
+	tpg = vm_page_grab(tobj, idx, VM_ALLOC_WIRED |
+	    VM_ALLOC_ZERO | VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
+	if (tpg->valid != VM_PAGE_BITS_ALL) {
+		int behind, ahead;
+		if (vm_pager_has_page(tobj, idx, &behind, &ahead)) {
+			error = vm_pager_get_pages(tobj, &tpg, 1, 0);
+			if (error != 0) {
+				printf("tmpfs get pages from pager error [write]\n");
+				goto out;
+			}
+		} else
+			vm_page_zero_invalid(tpg, TRUE);
+	}
+	VM_OBJECT_UNLOCK(tobj);
+	if (vpg == NULL) {
+		sched_pin();
+		sf = sf_buf_alloc(tpg, SFB_CPUPRIVATE);
+		va = (caddr_t)sf_buf_kva(sf);
+		error = uiomove(va + offset, tlen, uio);
+		sf_buf_free(sf);
+		sched_unpin();
+	} else {
+		KASSERT(vpg->valid == VM_PAGE_BITS_ALL, ("parts of vpg invalid"));
+		pmap_copy_page(vpg, tpg);
+	}
+	VM_OBJECT_LOCK(tobj);
+out:
+	if (vobj != NULL)
+		VM_OBJECT_LOCK(vobj);
+	vm_page_lock_queues();
+	if (error == 0) {
+		vm_page_set_validclean(tpg, offset, tlen);
+		vm_page_zero_invalid(tpg, TRUE);
+		vm_page_dirty(tpg);
+	}
+	vm_page_unwire(tpg, 0);
+	vm_page_activate(tpg);
+	vm_page_unlock_queues();
+	vm_page_wakeup(tpg);
+	if (vpg != NULL)
+		vm_page_wakeup(vpg);
+	if (vobj != NULL)
+		VM_OBJECT_UNLOCK(vobj);
+	vm_object_pip_subtract(tobj, 1);
+	VM_OBJECT_UNLOCK(tobj);
+
+	return	(error);
+}
+
+static int
+tmpfs_write(struct vop_write_args *v)
+{
+	struct vnode *vp = v->a_vp;
+	struct uio *uio = v->a_uio;
+	int ioflag = v->a_ioflag;
+	struct thread *td = uio->uio_td;
+
+	boolean_t extended;
+	int error = 0;
+	off_t oldsize;
+	struct tmpfs_node *node;
+	vm_object_t uobj;
+	size_t len;
+	int resid;
+
+	node = VP_TO_TMPFS_NODE(vp);
+	oldsize = node->tn_size;
+
+	if (uio->uio_offset < 0 || vp->v_type != VREG) {
+		error = EINVAL;
+		goto out;
+	}
+
+	if (uio->uio_resid == 0) {
+		error = 0;
+		goto out;
+	}
+
+	if (ioflag & IO_APPEND)
+		uio->uio_offset = node->tn_size;
+
+	if (uio->uio_offset + uio->uio_resid >
+	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
+		return (EFBIG);
+
+	if (vp->v_type == VREG && td != NULL) {
+		PROC_LOCK(td->td_proc);
+		if (uio->uio_offset + uio->uio_resid >
+		  lim_cur(td->td_proc, RLIMIT_FSIZE)) {
+			psignal(td->td_proc, SIGXFSZ);
+			PROC_UNLOCK(td->td_proc);
+			return (EFBIG);
+		}
+		PROC_UNLOCK(td->td_proc);
+	}
+
+	extended = uio->uio_offset + uio->uio_resid > node->tn_size;
+	if (extended) {
+		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
+		if (error != 0)
+			goto out;
+	}
+
+	uobj = node->tn_reg.tn_aobj;
+	while ((resid = uio->uio_resid) > 0) {
+		if (node->tn_size <= uio->uio_offset)
+			break;
+		len = MIN(node->tn_size - uio->uio_offset, resid);
+		if (len == 0)
+			break;
+		error = tmpfs_mappedwrite(vp->v_object, uobj, len, uio);
+		if ((error != 0) || (resid == uio->uio_resid))
+			break;
+	}
+
+	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
+	    (extended ? TMPFS_NODE_CHANGED : 0);
+
+	if (node->tn_mode & (S_ISUID | S_ISGID)) {
+		if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID, 0))
+			node->tn_mode &= ~(S_ISUID | S_ISGID);
+	}
+
+	if (error != 0)
+		(void)tmpfs_reg_resize(vp, oldsize);
+
+out:
+	MPASS(IMPLIES(error == 0, uio->uio_resid == 0));
+	MPASS(IMPLIES(error != 0, oldsize == node->tn_size));
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_fsync(struct vop_fsync_args *v)
+{
+	struct vnode *vp = v->a_vp;
+
+	MPASS(VOP_ISLOCKED(vp, v->a_td));
+
+	tmpfs_update(vp);
+
+	return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_remove(struct vop_remove_args *v)
+{
+	struct vnode *dvp = v->a_dvp;
+	struct vnode *vp = v->a_vp;
+
+	int error;
+	struct tmpfs_dirent *de;
+	struct tmpfs_mount *tmp;
+	struct tmpfs_node *dnode;
+	struct tmpfs_node *node;
+
+	MPASS(VOP_ISLOCKED(dvp, v->a_cnp->cn_thread));
+	MPASS(VOP_ISLOCKED(vp, v->a_cnp->cn_thread));
+
+	if (vp->v_type == VDIR) {
+		error = EISDIR;
+		goto out;
+	}
+
+	dnode = VP_TO_TMPFS_DIR(dvp);
+	node = VP_TO_TMPFS_NODE(vp);
+	tmp = VFS_TO_TMPFS(vp->v_mount);
+	de = tmpfs_dir_search(dnode, node);
+	MPASS(de != NULL);
+
+	/* Files marked as immutable or append-only cannot be deleted. */
+	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
+	    (dnode->tn_flags & APPEND)) {
+		error = EPERM;
+		goto out;
+	}
+
+	/* Remove the entry from the directory; as it is a file, we do not
+	 * have to change the number of hard links of the directory. */
+	tmpfs_dir_detach(dvp, de);
+
+	/* Free the directory entry we just deleted.  Note that the node
+	 * referred by it will not be removed until the vnode is really
+	 * reclaimed. */
+	tmpfs_free_dirent(tmp, de, TRUE);
+
+	if (node->tn_links > 0)
+		node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
+	    TMPFS_NODE_MODIFIED;
+	error = 0;
+
+out:
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_link(struct vop_link_args *v)
+{
+	struct vnode *dvp = v->a_tdvp;
+	struct vnode *vp = v->a_vp;
+	struct componentname *cnp = v->a_cnp;
+
+	int error;
+	struct tmpfs_dirent *de;
+	struct tmpfs_node *node;
+
+	MPASS(VOP_ISLOCKED(dvp, cnp->cn_thread));
+	MPASS(cnp->cn_flags & HASBUF);
+	MPASS(dvp != vp); /* XXX When can this be false? */
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	/* XXX: Why aren't the following two tests done by the caller? */
+
+	/* Hard links of directories are forbidden. */
+	if (vp->v_type == VDIR) {
+		error = EPERM;
+		goto out;
+	}
+
+	/* Cannot create cross-device links. */
+	if (dvp->v_mount != vp->v_mount) {
+		error = EXDEV;
+		goto out;
+	}
+
+	/* Ensure that we do not overflow the maximum number of links imposed
+	 * by the system. */
+	MPASS(node->tn_links <= LINK_MAX);
+	if (node->tn_links == LINK_MAX) {
+		error = EMLINK;
+		goto out;
+	}
+
+	/* We cannot create links of files marked immutable or append-only. */
+	if (node->tn_flags & (IMMUTABLE | APPEND)) {
+		error = EPERM;
+		goto out;
+	}
+
+	/* Allocate a new directory entry to represent the node. */
+	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
+	    cnp->cn_nameptr, cnp->cn_namelen, &de);
+	if (error != 0)
+		goto out;
+
+	/* Insert the new directory entry into the appropriate directory. */
+	tmpfs_dir_attach(dvp, de);
+
+	/* vp link count has changed, so update node times. */
+	node->tn_status |= TMPFS_NODE_CHANGED;
+	tmpfs_update(vp);
+
+	error = 0;
+
+out:
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_rename(struct vop_rename_args *v)
+{
+	struct vnode *fdvp = v->a_fdvp;
+	struct vnode *fvp = v->a_fvp;
+	struct componentname *fcnp = v->a_fcnp;
+	struct vnode *tdvp = v->a_tdvp;
+	struct vnode *tvp = v->a_tvp;
+	struct componentname *tcnp = v->a_tcnp;
+
+	char *newname;
+	int error;
+	struct tmpfs_dirent *de;
+	struct tmpfs_node *fdnode;
+	struct tmpfs_node *fnode;
+	struct tmpfs_node *tnode;
+	struct tmpfs_node *tdnode;
+
+	MPASS(VOP_ISLOCKED(tdvp, tcnp->cn_thread));
+	MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp, tcnp->cn_thread)));
+	MPASS(fcnp->cn_flags & HASBUF);
+	MPASS(tcnp->cn_flags & HASBUF);
+
+  	tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
+
+	/* Disallow cross-device renames.
+	 * XXX Why isn't this done by the caller? */
+	if (fvp->v_mount != tdvp->v_mount ||
+	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
+		error = EXDEV;
+		goto out;
+	}
+
+	tdnode = VP_TO_TMPFS_DIR(tdvp);
+
+	/* If source and target are the same file, there is nothing to do. */
+	if (fvp == tvp) {
+		error = 0;
+		goto out;
+	}
+
+	/* If we need to move the directory between entries, lock the
+	 * source so that we can safely operate on it. */
+	if (tdvp != fdvp) {
+		error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, tcnp->cn_thread);
+		if (error != 0)
+			goto out;
+	}
+	fdnode = VP_TO_TMPFS_DIR(fdvp);
+	fnode = VP_TO_TMPFS_NODE(fvp);
+	de = tmpfs_dir_search(fdnode, fnode);
+
+	/* Avoid manipulating '.' and '..' entries. */
+	if (de == NULL) {
+		MPASS(fvp->v_type == VDIR);
+		error = EINVAL;
+		goto out_locked;
+	}
+	MPASS(de->td_node == fnode);
+
+	/* If re-naming a directory to another preexisting directory
+	 * ensure that the target directory is empty so that its
+	 * removal causes no side effects.
+	 * Kern_rename gurantees the destination to be a directory
+	 * if the source is one. */
+	if (tvp != NULL) {
+		MPASS(tnode != NULL);
+
+		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
+		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
+			error = EPERM;
+			goto out_locked;
+		}
+
+		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
+			if (tnode->tn_size > 0) {
+				error = ENOTEMPTY;
+				goto out_locked;
+			}
+		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
+			error = ENOTDIR;
+			goto out_locked;
+		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
+			error = EISDIR;
+			goto out_locked;
+		} else {
+			MPASS(fnode->tn_type != VDIR &&
+				tnode->tn_type != VDIR);
+		}
+	}
+
+	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))
+	    || (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
+		error = EPERM;
+		goto out_locked;
+	}
+
+	/* Ensure that we have enough memory to hold the new name, if it
+	 * has to be changed. */
+	if (fcnp->cn_namelen != tcnp->cn_namelen ||
+	    memcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) {
+		newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK);
+	} else
+		newname = NULL;
+
+	/* If the node is being moved to another directory, we have to do
+	 * the move. */
+	if (fdnode != tdnode) {
+		/* In case we are moving a directory, we have to adjust its
+		 * parent to point to the new parent. */
+		if (de->td_node->tn_type == VDIR) {
+			struct tmpfs_node *n;
+
+			/* Ensure the target directory is not a child of the
+			 * directory being moved.  Otherwise, we'd end up
+			 * with stale nodes. */
+			n = tdnode;
+			while (n != n->tn_dir.tn_parent) {
+				if (n == fnode) {
+					error = EINVAL;
+					if (newname != NULL)
+						    free(newname, M_TMPFSNAME);
+					goto out_locked;
+				}
+				n = n->tn_dir.tn_parent;
+			}
+
+			/* Adjust the parent pointer. */
+			TMPFS_VALIDATE_DIR(fnode);
+			de->td_node->tn_dir.tn_parent = tdnode;
+
+			/* As a result of changing the target of the '..'
+			 * entry, the link count of the source and target
+			 * directories has to be adjusted. */
+			fdnode->tn_links--;
+			tdnode->tn_links++;
+		}
+
+		/* Do the move: just remove the entry from the source directory
+		 * and insert it into the target one. */
+		tmpfs_dir_detach(fdvp, de);
+		tmpfs_dir_attach(tdvp, de);
+	}
+
+	/* If the name has changed, we need to make it effective by changing
+	 * it in the directory entry. */
+	if (newname != NULL) {
+		MPASS(tcnp->cn_namelen <= MAXNAMLEN);
+
+		free(de->td_name, M_TMPFSNAME);
+		de->td_namelen = (uint16_t)tcnp->cn_namelen;
+		memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
+		de->td_name = newname;
+
+		fnode->tn_status |= TMPFS_NODE_CHANGED;
+		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
+	}
+
+	/* If we are overwriting an entry, we have to remove the old one
+	 * from the target directory. */
+	if (tvp != NULL) {
+		/* Remove the old entry from the target directory. */
+		de = tmpfs_dir_search(tdnode, tnode);
+		tmpfs_dir_detach(tdvp, de);
+
+		/* Free the directory entry we just deleted.  Note that the
+		 * node referred by it will not be removed until the vnode is
+		 * really reclaimed. */
+		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de, TRUE);
+	}
+
+	error = 0;
+
+out_locked:
+	if (fdnode != tdnode)
+		VOP_UNLOCK(fdvp, 0, tcnp->cn_thread);
+
+out:
+	/* Release target nodes. */
+	/* XXX: I don't understand when tdvp can be the same as tvp, but
+	 * other code takes care of this... */
+	if (tdvp == tvp)
+		vrele(tdvp);
+	else
+		vput(tdvp);
+	if (tvp != NULL)
+		vput(tvp);
+
+	/* Release source nodes. */
+	vrele(fdvp);
+	vrele(fvp);
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_mkdir(struct vop_mkdir_args *v)
+{
+	struct vnode *dvp = v->a_dvp;
+	struct vnode **vpp = v->a_vpp;
+	struct componentname *cnp = v->a_cnp;
+	struct vattr *vap = v->a_vap;
+
+	MPASS(vap->va_type == VDIR);
+
+	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_rmdir(struct vop_rmdir_args *v)
+{
+	struct vnode *dvp = v->a_dvp;
+	struct vnode *vp = v->a_vp;
+
+	int error;
+	struct tmpfs_dirent *de;
+	struct tmpfs_mount *tmp;
+	struct tmpfs_node *dnode;
+	struct tmpfs_node *node;
+
+	MPASS(VOP_ISLOCKED(dvp, v->a_cnp->cn_thread));
+	MPASS(VOP_ISLOCKED(vp, v->a_cnp->cn_thread));
+
+	tmp = VFS_TO_TMPFS(dvp->v_mount);
+	dnode = VP_TO_TMPFS_DIR(dvp);
+	node = VP_TO_TMPFS_DIR(vp);
+
+	/* Directories with more than two entries ('.' and '..') cannot be
+	 * removed. */
+	 if (node->tn_size > 0) {
+		 error = ENOTEMPTY;
+		 goto out;
+	 }
+
+	if ((dnode->tn_flags & APPEND)
+	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
+		error = EPERM;
+		goto out;
+	}
+
+	/* This invariant holds only if we are not trying to remove "..".
+	  * We checked for that above so this is safe now. */
+	MPASS(node->tn_dir.tn_parent == dnode);
+
+	/* Get the directory entry associated with node (vp).  This was
+	 * filled by tmpfs_lookup while looking up the entry. */
+	de = tmpfs_dir_search(dnode, node);
+	MPASS(TMPFS_DIRENT_MATCHES(de,
+	    v->a_cnp->cn_nameptr,
+	    v->a_cnp->cn_namelen));
+
+	/* Check flags to see if we are allowed to remove the directory. */
+	if (dnode->tn_flags & APPEND
+		|| node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
+		error = EPERM;
+		goto out;
+	}
+
+	/* Detach the directory entry from the directory (dnode). */
+	tmpfs_dir_detach(dvp, de);
+
+	node->tn_links--;
+	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
+	    TMPFS_NODE_MODIFIED;
+	node->tn_dir.tn_parent->tn_links--;
+	node->tn_dir.tn_parent->tn_status |= TMPFS_NODE_ACCESSED | \
+	    TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
+
+	cache_purge(dvp);
+	cache_purge(vp);
+
+	/* Free the directory entry we just deleted.  Note that the node
+	 * referred by it will not be removed until the vnode is really
+	 * reclaimed. */
+	tmpfs_free_dirent(tmp, de, TRUE);
+
+	/* Release the deleted vnode (will destroy the node, notify
+	 * interested parties and clean it from the cache). */
+
+	dnode->tn_status |= TMPFS_NODE_CHANGED;
+	tmpfs_update(dvp);
+
+	error = 0;
+
+out:
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_symlink(struct vop_symlink_args *v)
+{
+	struct vnode *dvp = v->a_dvp;
+	struct vnode **vpp = v->a_vpp;
+	struct componentname *cnp = v->a_cnp;
+	struct vattr *vap = v->a_vap;
+	char *target = v->a_target;
+
+#ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */
+	MPASS(vap->va_type == VLNK);
+#else
+	vap->va_type = VLNK;
+#endif
+
+	return tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_readdir(struct vop_readdir_args *v)
+{
+	struct vnode *vp = v->a_vp;
+	struct uio *uio = v->a_uio;
+	int *eofflag = v->a_eofflag;
+	u_long **cookies = v->a_cookies;
+	int *ncookies = v->a_ncookies;
+
+	int error;
+	off_t startoff;
+	off_t cnt = 0;
+	struct tmpfs_node *node;
+
+	/* This operation only makes sense on directory nodes. */
+	if (vp->v_type != VDIR)
+		return ENOTDIR;
+
+	node = VP_TO_TMPFS_DIR(vp);
+
+	startoff = uio->uio_offset;
+
+	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
+		error = tmpfs_dir_getdotdent(node, uio);
+		if (error != 0)
+			goto outok;
+		cnt++;
+	}
+
+	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
+		error = tmpfs_dir_getdotdotdent(node, uio);
+		if (error != 0)
+			goto outok;
+		cnt++;
+	}
+
+	error = tmpfs_dir_getdents(node, uio, &cnt);
+
+outok:
+	MPASS(error >= -1);
+
+	if (error == -1)
+		error = 0;
+
+	if (eofflag != NULL)
+		*eofflag =
+		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
+
+	/* Update NFS-related variables. */
+	if (error == 0 && cookies != NULL && ncookies != NULL) {
+		off_t i;
+		off_t off = startoff;
+		struct tmpfs_dirent *de = NULL;
+
+		*ncookies = cnt;
+		*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
+
+		for (i = 0; i < cnt; i++) {
+			MPASS(off != TMPFS_DIRCOOKIE_EOF);
+			if (off == TMPFS_DIRCOOKIE_DOT) {
+				off = TMPFS_DIRCOOKIE_DOTDOT;
+			} else {
+				if (off == TMPFS_DIRCOOKIE_DOTDOT) {
+					de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
+				} else if (de != NULL) {
+					de = TAILQ_NEXT(de, td_entries);
+				} else {
+					de = tmpfs_dir_lookupbycookie(node,
+					    off);
+					MPASS(de != NULL);
+					de = TAILQ_NEXT(de, td_entries);
+				}
+				if (de == NULL)
+					off = TMPFS_DIRCOOKIE_EOF;
+				else
+					off = tmpfs_dircookie(de);
+			}
+
+			(*cookies)[i] = off;
+		}
+		MPASS(uio->uio_offset == off);
+	}
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_readlink(struct vop_readlink_args *v)
+{
+	struct vnode *vp = v->a_vp;
+	struct uio *uio = v->a_uio;
+
+	int error;
+	struct tmpfs_node *node;
+
+	MPASS(uio->uio_offset == 0);
+	MPASS(vp->v_type == VLNK);
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid),
+	    uio);
+	node->tn_status |= TMPFS_NODE_ACCESSED;
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_inactive(struct vop_inactive_args *v)
+{
+	struct vnode *vp = v->a_vp;
+	struct thread *l = v->a_td;
+
+	struct tmpfs_node *node;
+
+	MPASS(VOP_ISLOCKED(vp, l));
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	if (node->tn_links == 0)
+		vrecycle(vp, l);
+
+	return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+int
+tmpfs_reclaim(struct vop_reclaim_args *v)
+{
+	struct vnode *vp = v->a_vp;
+
+	struct tmpfs_mount *tmp;
+	struct tmpfs_node *node;
+
+	node = VP_TO_TMPFS_NODE(vp);
+	tmp = VFS_TO_TMPFS(vp->v_mount);
+
+	vnode_destroy_vobject(vp);
+	cache_purge(vp);
+	tmpfs_free_vp(vp);
+
+	/* If the node referenced by this vnode was deleted by the user,
+	 * we must free its associated data structures (now that the vnode
+	 * is being reclaimed). */
+	if (node->tn_links == 0)
+		tmpfs_free_node(tmp, node);
+
+	MPASS(vp->v_data == NULL);
+	return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_print(struct vop_print_args *v)
+{
+	struct vnode *vp = v->a_vp;
+
+	struct tmpfs_node *node;
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n",
+	    node, node->tn_flags, node->tn_links);
+	printf("\tmode 0%o, owner %d, group %d, size %" PRIdMAX
+	    ", status 0x%x\n",
+	    node->tn_mode, node->tn_uid, node->tn_gid,
+	    (uintmax_t)node->tn_size, node->tn_status);
+
+	if (vp->v_type == VFIFO)
+		fifo_printinfo(vp);
+
+	printf("\n");
+
+	return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_pathconf(struct vop_pathconf_args *v)
+{
+	int name = v->a_name;
+	register_t *retval = v->a_retval;
+
+	int error;
+
+	error = 0;
+
+	switch (name) {
+	case _PC_LINK_MAX:
+		*retval = LINK_MAX;
+		break;
+
+	case _PC_NAME_MAX:
+		*retval = NAME_MAX;
+		break;
+
+	case _PC_PATH_MAX:
+		*retval = PATH_MAX;
+		break;
+
+	case _PC_PIPE_BUF:
+		*retval = PIPE_BUF;
+		break;
+
+	case _PC_CHOWN_RESTRICTED:
+		*retval = 1;
+		break;
+
+	case _PC_NO_TRUNC:
+		*retval = 1;
+		break;
+
+	case _PC_SYNC_IO:
+		*retval = 1;
+		break;
+
+	case _PC_FILESIZEBITS:
+		*retval = 0; /* XXX Don't know which value should I return. */
+		break;
+
+	default:
+		error = EINVAL;
+	}
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_advlock(struct vop_advlock_args *v)
+{
+	struct vnode *vp = v->a_vp;
+
+	struct tmpfs_node *node;
+
+	node = VP_TO_TMPFS_NODE(vp);
+
+	return lf_advlock(v, &node->tn_lockf, node->tn_size);
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_vptofh(struct vop_vptofh_args *ap)
+{
+	struct tmpfs_fid *tfhp;
+	struct tmpfs_node *node;
+
+	tfhp = (struct tmpfs_fid *)ap->a_fhp;
+	node = VP_TO_TMPFS_NODE(ap->a_vp);
+
+	tfhp->tf_len = sizeof(struct tmpfs_fid);
+	tfhp->tf_id = node->tn_id;
+	tfhp->tf_gen = node->tn_gen;
+
+	return (0);
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * vnode operations vector used for files stored in a tmpfs file system.
+ */
+struct vop_vector tmpfs_vnodeop_entries = {
+	.vop_default =			&default_vnodeops,
+	.vop_lookup =			vfs_cache_lookup,
+	.vop_cachedlookup =		tmpfs_lookup,
+	.vop_create =			tmpfs_create,
+	.vop_mknod =			tmpfs_mknod,
+	.vop_open =			tmpfs_open,
+	.vop_close =			tmpfs_close,
+	.vop_access =			tmpfs_access,
+	.vop_getattr =			tmpfs_getattr,
+	.vop_setattr =			tmpfs_setattr,
+	.vop_read =			tmpfs_read,
+	.vop_write =			tmpfs_write,
+	.vop_fsync =			tmpfs_fsync,
+	.vop_remove =			tmpfs_remove,
+	.vop_link =			tmpfs_link,
+	.vop_rename =			tmpfs_rename,
+	.vop_mkdir =			tmpfs_mkdir,
+	.vop_rmdir =			tmpfs_rmdir,
+	.vop_symlink =			tmpfs_symlink,
+	.vop_readdir =			tmpfs_readdir,
+	.vop_readlink =			tmpfs_readlink,
+	.vop_inactive =			tmpfs_inactive,
+	.vop_reclaim =			tmpfs_reclaim,
+	.vop_print =			tmpfs_print,
+	.vop_pathconf =			tmpfs_pathconf,
+	.vop_advlock =			tmpfs_advlock,
+	.vop_vptofh =			tmpfs_vptofh,
+	.vop_bmap =			VOP_EOPNOTSUPP,
+};
+
--- /dev/null
+++ sys/fs/tmpfs/tmpfs.h
@@ -0,0 +1,579 @@
+/*	$NetBSD: tmpfs.h,v 1.26 2007/02/22 06:37:00 thorpej Exp $	*/
+
+/*
+ * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
+ * 2005 program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/fs/tmpfs/tmpfs.h,v 1.11.2.1 2007/11/26 06:46:44 delphij Exp $
+ */
+
+#ifndef _FS_TMPFS_TMPFS_H_
+#define _FS_TMPFS_TMPFS_H_
+
+/* ---------------------------------------------------------------------
+ * KERNEL-SPECIFIC DEFINITIONS
+ * --------------------------------------------------------------------- */
+#include <sys/dirent.h>
+#include <sys/mount.h>
+#include <sys/queue.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+/* --------------------------------------------------------------------- */
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/vmmeter.h>
+#include <vm/swap_pager.h>
+
+MALLOC_DECLARE(M_TMPFSMNT);
+MALLOC_DECLARE(M_TMPFSNAME);
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Internal representation of a tmpfs directory entry.
+ */
+struct tmpfs_dirent {
+	TAILQ_ENTRY(tmpfs_dirent)	td_entries;
+
+	/* Length of the name stored in this directory entry.  This avoids
+	 * the need to recalculate it every time the name is used. */
+	uint16_t			td_namelen;
+
+	/* The name of the entry, allocated from a string pool.  This
+	* string is not required to be zero-terminated; therefore, the
+	* td_namelen field must always be used when accessing its value. */
+	char *				td_name;
+
+	/* Pointer to the node this entry refers to. */
+	struct tmpfs_node *		td_node;
+};
+
+/* A directory in tmpfs holds a sorted list of directory entries, which in
+ * turn point to other files (which can be directories themselves).
+ *
+ * In tmpfs, this list is managed by a tail queue, whose head is defined by
+ * the struct tmpfs_dir type.
+ *
+ * It is imporant to notice that directories do not have entries for . and
+ * .. as other file systems do.  These can be generated when requested
+ * based on information available by other means, such as the pointer to
+ * the node itself in the former case or the pointer to the parent directory
+ * in the latter case.  This is done to simplify tmpfs's code and, more
+ * importantly, to remove redundancy. */
+TAILQ_HEAD(tmpfs_dir, tmpfs_dirent);
+
+/* Each entry in a directory has a cookie that identifies it.  Cookies
+ * supersede offsets within directories because, given how tmpfs stores
+ * directories in memory, there is no such thing as an offset.  (Emulating
+ * a real offset could be very difficult.)
+ * 
+ * The '.', '..' and the end of directory markers have fixed cookies which
+ * cannot collide with the cookies generated by other entries.  The cookies
+ * fot the other entries are generated based on the memory address on which
+ * stores their information is stored.
+ *
+ * Ideally, using the entry's memory pointer as the cookie would be enough
+ * to represent it and it wouldn't cause collisions in any system.
+ * Unfortunately, this results in "offsets" with very large values which
+ * later raise problems in the Linux compatibility layer (and maybe in other
+ * places) as described in PR kern/32034.  Hence we need to workaround this
+ * with a rather ugly hack.
+ *
+ * Linux 32-bit binaries, unless built with _FILE_OFFSET_BITS=64, have off_t
+ * set to 'long', which is a 32-bit *signed* long integer.  Regardless of
+ * the macro value, GLIBC (2.3 at least) always uses the getdents64
+ * system call (when calling readdir) which internally returns off64_t
+ * offsets.  In order to make 32-bit binaries work, *GLIBC* converts the
+ * 64-bit values returned by the kernel to 32-bit ones and aborts with
+ * EOVERFLOW if the conversion results in values that won't fit in 32-bit
+ * integers (which it assumes is because the directory is extremely large).
+ * This wouldn't cause problems if we were dealing with unsigned integers,
+ * but as we have signed integers, this check fails due to sign expansion.
+ *
+ * For example, consider that the kernel returns the 0xc1234567 cookie to
+ * userspace in a off64_t integer.  Later on, GLIBC casts this value to
+ * off_t (remember, signed) with code similar to:
+ *     system call returns the offset in kernel_value;
+ *     off_t casted_value = kernel_value;
+ *     if (sizeof(off_t) != sizeof(off64_t) &&
+ *         kernel_value != casted_value)
+ *             error!
+ * In this case, casted_value still has 0xc1234567, but when it is compared
+ * for equality against kernel_value, it is promoted to a 64-bit integer and
+ * becomes 0xffffffffc1234567, which is different than 0x00000000c1234567.
+ * Then, GLIBC assumes this is because the directory is very large.
+ *
+ * Given that all the above happens in user-space, we have no control over
+ * it; therefore we must workaround the issue here.  We do this by
+ * truncating the pointer value to a 32-bit integer and hope that there
+ * won't be collisions.  In fact, this will not cause any problems in
+ * 32-bit platforms but some might arise in 64-bit machines (I'm not sure
+ * if they can happen at all in practice).
+ *
+ * XXX A nicer solution shall be attempted. */
+#ifdef _KERNEL
+#define	TMPFS_DIRCOOKIE_DOT	0
+#define	TMPFS_DIRCOOKIE_DOTDOT	1
+#define	TMPFS_DIRCOOKIE_EOF	2
+static __inline
+off_t
+tmpfs_dircookie(struct tmpfs_dirent *de)
+{
+	off_t cookie;
+
+	cookie = ((off_t)(uintptr_t)de >> 1) & 0x7FFFFFFF;
+	MPASS(cookie != TMPFS_DIRCOOKIE_DOT);
+	MPASS(cookie != TMPFS_DIRCOOKIE_DOTDOT);
+	MPASS(cookie != TMPFS_DIRCOOKIE_EOF);
+
+	return cookie;
+}
+#endif
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Internal representation of a tmpfs file system node.
+ *
+ * This structure is splitted in two parts: one holds attributes common
+ * to all file types and the other holds data that is only applicable to
+ * a particular type.  The code must be careful to only access those
+ * attributes that are actually allowed by the node's type.
+ *
+ *
+ * Below is the key of locks used to protected the fields in the following
+ * structures.
+ *
+ */
+struct tmpfs_node {
+	/* Doubly-linked list entry which links all existing nodes for a
+	 * single file system.  This is provided to ease the removal of
+	 * all nodes during the unmount operation. */
+	LIST_ENTRY(tmpfs_node)	tn_entries;
+
+	/* The node's type.  Any of 'VBLK', 'VCHR', 'VDIR', 'VFIFO',
+	 * 'VLNK', 'VREG' and 'VSOCK' is allowed.  The usage of vnode
+	 * types instead of a custom enumeration is to make things simpler
+	 * and faster, as we do not need to convert between two types. */
+	enum vtype		tn_type;
+
+	/* Node identifier. */
+	ino_t			tn_id;
+
+	/* Node's internal status.  This is used by several file system
+	 * operations to do modifications to the node in a delayed
+	 * fashion. */
+	int			tn_status;
+#define	TMPFS_NODE_ACCESSED	(1 << 1)
+#define	TMPFS_NODE_MODIFIED	(1 << 2)
+#define	TMPFS_NODE_CHANGED	(1 << 3)
+
+	/* The node size.  It does not necessarily match the real amount
+	 * of memory consumed by it. */
+	off_t			tn_size;
+
+	/* Generic node attributes. */
+	uid_t			tn_uid;
+	gid_t			tn_gid;
+	mode_t			tn_mode;
+	int			tn_flags;
+	nlink_t			tn_links;
+	struct timespec		tn_atime;
+	struct timespec		tn_mtime;
+	struct timespec		tn_ctime;
+	struct timespec		tn_birthtime;
+	unsigned long		tn_gen;
+
+	/* Head of byte-level lock list (used by tmpfs_advlock). */
+	struct lockf *		tn_lockf;
+
+	/* As there is a single vnode for each active file within the
+	 * system, care has to be taken to avoid allocating more than one
+	 * vnode per file.  In order to do this, a bidirectional association
+	 * is kept between vnodes and nodes.
+	 *
+	 * Whenever a vnode is allocated, its v_data field is updated to
+	 * point to the node it references.  At the same time, the node's
+	 * tn_vnode field is modified to point to the new vnode representing
+	 * it.  Further attempts to allocate a vnode for this same node will
+	 * result in returning a new reference to the value stored in
+	 * tn_vnode.
+	 *
+	 * May be NULL when the node is unused (that is, no vnode has been
+	 * allocated for it or it has been reclaimed). */
+	struct vnode *		tn_vnode;
+
+	/* interlock to protect tn_vpstate */
+	struct mtx	tn_interlock;
+
+	/* Identify if current node has vnode assiocate with
+	 * or allocating vnode.
+	 */
+	int		tn_vpstate;
+
+	/* misc data field for different tn_type node */
+	union {
+		/* Valid when tn_type == VBLK || tn_type == VCHR. */
+		dev_t			tn_rdev;
+
+		/* Valid when tn_type == VDIR. */
+		struct tn_dir{
+			/* Pointer to the parent directory.  The root
+			 * directory has a pointer to itself in this field;
+			 * this property identifies the root node. */
+			struct tmpfs_node *	tn_parent;
+
+			/* Head of a tail-queue that links the contents of
+			 * the directory together.  See above for a
+			 * description of its contents. */
+			struct tmpfs_dir	tn_dirhead;
+
+			/* Number and pointer of the first directory entry
+			 * returned by the readdir operation if it were
+			 * called again to continue reading data from the
+			 * same directory as before.  This is used to speed
+			 * up reads of long directories, assuming that no
+			 * more than one read is in progress at a given time.
+			 * Otherwise, these values are discarded and a linear
+			 * scan is performed from the beginning up to the
+			 * point where readdir starts returning values. */
+			off_t			tn_readdir_lastn;
+			struct tmpfs_dirent *	tn_readdir_lastp;
+		}tn_dir;
+
+		/* Valid when tn_type == VLNK. */
+		/* The link's target, allocated from a string pool. */
+		char *			tn_link;
+
+		/* Valid when tn_type == VREG. */
+		struct tn_reg {
+			/* The contents of regular files stored in a tmpfs
+			 * file system are represented by a single anonymous
+			 * memory object (aobj, for short).  The aobj provides
+			 * direct access to any position within the file,
+			 * because its contents are always mapped in a
+			 * contiguous region of virtual memory.  It is a task
+			 * of the memory management subsystem (see uvm(9)) to
+			 * issue the required page ins or page outs whenever
+			 * a position within the file is accessed. */
+			vm_object_t		tn_aobj;
+			size_t			tn_aobj_pages;
+
+		}tn_reg;
+
+		/* Valid when tn_type = VFIFO */
+		struct tn_fifo {
+			fo_rdwr_t		*tn_fo_read;
+			fo_rdwr_t		*tn_fo_write;
+		}tn_fifo;
+	}tn_spec;
+};
+LIST_HEAD(tmpfs_node_list, tmpfs_node);
+
+#define tn_rdev tn_spec.tn_rdev
+#define tn_dir tn_spec.tn_dir
+#define tn_link tn_spec.tn_link
+#define tn_reg tn_spec.tn_reg
+#define tn_fifo tn_spec.tn_fifo
+
+#define TMPFS_NODE_LOCK(node) mtx_lock(&(node)->tn_interlock)
+#define TMPFS_NODE_UNLOCK(node) mtx_unlock(&(node)->tn_interlock)
+#define        TMPFS_NODE_MTX(node) (&(node)->tn_interlock)
+
+#define TMPFS_VNODE_ALLOCATING	1
+#define TMPFS_VNODE_WANT	2
+/* --------------------------------------------------------------------- */
+
+/*
+ * Internal representation of a tmpfs mount point.
+ */
+struct tmpfs_mount {
+	/* Maximum number of memory pages available for use by the file
+	 * system, set during mount time.  This variable must never be
+	 * used directly as it may be bigger than the current amount of
+	 * free memory; in the extreme case, it will hold the SIZE_MAX
+	 * value.  Instead, use the TMPFS_PAGES_MAX macro. */
+	size_t			tm_pages_max;
+
+	/* Number of pages in use by the file system.  Cannot be bigger
+	 * than the value returned by TMPFS_PAGES_MAX in any case. */
+	size_t			tm_pages_used;
+
+	/* Pointer to the node representing the root directory of this
+	 * file system. */
+	struct tmpfs_node *	tm_root;
+
+	/* Maximum number of possible nodes for this file system; set
+	 * during mount time.  We need a hard limit on the maximum number
+	 * of nodes to avoid allocating too much of them; their objects
+	 * cannot be released until the file system is unmounted.
+	 * Otherwise, we could easily run out of memory by creating lots
+	 * of empty files and then simply removing them. */
+	ino_t			tm_nodes_max;
+
+	/* unrhdr used to allocate inode numbers */
+	struct unrhdr *		tm_ino_unr;
+
+	/* Number of nodes currently that are in use. */
+	ino_t			tm_nodes_inuse;
+
+	/* maximum representable file size */
+	u_int64_t		tm_maxfilesize;
+
+	/* Nodes are organized in two different lists.  The used list
+	 * contains all nodes that are currently used by the file system;
+	 * i.e., they refer to existing files.  The available list contains
+	 * all nodes that are currently available for use by new files.
+	 * Nodes must be kept in this list (instead of deleting them)
+	 * because we need to keep track of their generation number (tn_gen
+	 * field).
+	 *
+	 * Note that nodes are lazily allocated: if the available list is
+	 * empty and we have enough space to create more nodes, they will be
+	 * created and inserted in the used list.  Once these are released,
+	 * they will go into the available list, remaining alive until the
+	 * file system is unmounted. */
+	struct tmpfs_node_list	tm_nodes_used;
+
+	/* All node lock to protect the node list and tmp_pages_used */
+	struct mtx allnode_lock;
+
+	/* Pools used to store file system meta data.  These are not shared
+	 * across several instances of tmpfs for the reasons described in
+	 * tmpfs_pool.c. */
+	uma_zone_t		tm_dirent_pool;
+	uma_zone_t		tm_node_pool;
+};
+#define TMPFS_LOCK(tm) mtx_lock(&(tm)->allnode_lock)
+#define TMPFS_UNLOCK(tm) mtx_unlock(&(tm)->allnode_lock)
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * This structure maps a file identifier to a tmpfs node.  Used by the
+ * NFS code.
+ */
+struct tmpfs_fid {
+	uint16_t		tf_len;
+	uint16_t		tf_pad;
+	ino_t			tf_id;
+	unsigned long		tf_gen;
+};
+
+/* --------------------------------------------------------------------- */
+
+#ifdef _KERNEL
+/*
+ * Prototypes for tmpfs_subr.c.
+ */
+
+int	tmpfs_alloc_node(struct tmpfs_mount *, enum vtype,
+	    uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *,
+	    char *, dev_t, struct thread *, struct tmpfs_node **);
+void	tmpfs_free_node(struct tmpfs_mount *, struct tmpfs_node *);
+int	tmpfs_alloc_dirent(struct tmpfs_mount *, struct tmpfs_node *,
+	    const char *, uint16_t, struct tmpfs_dirent **);
+void	tmpfs_free_dirent(struct tmpfs_mount *, struct tmpfs_dirent *,
+	    boolean_t);
+int	tmpfs_alloc_vp(struct mount *, struct tmpfs_node *, int,
+	    struct vnode **, struct thread *);
+void	tmpfs_free_vp(struct vnode *);
+int	tmpfs_alloc_file(struct vnode *, struct vnode **, struct vattr *,
+	    struct componentname *, char *);
+void	tmpfs_dir_attach(struct vnode *, struct tmpfs_dirent *);
+void	tmpfs_dir_detach(struct vnode *, struct tmpfs_dirent *);
+struct tmpfs_dirent *	tmpfs_dir_lookup(struct tmpfs_node *node,
+			    struct componentname *cnp);
+struct tmpfs_dirent *tmpfs_dir_search(struct tmpfs_node *node,
+    struct tmpfs_node *f);
+int	tmpfs_dir_getdotdent(struct tmpfs_node *, struct uio *);
+int	tmpfs_dir_getdotdotdent(struct tmpfs_node *, struct uio *);
+struct tmpfs_dirent *	tmpfs_dir_lookupbycookie(struct tmpfs_node *, off_t);
+int	tmpfs_dir_getdents(struct tmpfs_node *, struct uio *, off_t *);
+int	tmpfs_reg_resize(struct vnode *, off_t);
+int	tmpfs_chflags(struct vnode *, int, struct ucred *, struct thread *);
+int	tmpfs_chmod(struct vnode *, mode_t, struct ucred *, struct thread *);
+int	tmpfs_chown(struct vnode *, uid_t, gid_t, struct ucred *,
+	    struct thread *);
+int	tmpfs_chsize(struct vnode *, u_quad_t, struct ucred *, struct thread *);
+int	tmpfs_chtimes(struct vnode *, struct timespec *, struct timespec *,
+	    struct timespec *, int, struct ucred *, struct thread *);
+void	tmpfs_itimes(struct vnode *, const struct timespec *,
+	    const struct timespec *);
+
+void	tmpfs_update(struct vnode *);
+int	tmpfs_truncate(struct vnode *, off_t);
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Convenience macros to simplify some logical expressions.
+ */
+#define IMPLIES(a, b) (!(a) || (b))
+#define IFF(a, b) (IMPLIES(a, b) && IMPLIES(b, a))
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Checks that the directory entry pointed by 'de' matches the name 'name'
+ * with a length of 'len'.
+ */
+#define TMPFS_DIRENT_MATCHES(de, name, len) \
+    (de->td_namelen == (uint16_t)len && \
+    memcmp((de)->td_name, (name), (de)->td_namelen) == 0)
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Ensures that the node pointed by 'node' is a directory and that its
+ * contents are consistent with respect to directories.
+ */
+#define TMPFS_VALIDATE_DIR(node) \
+    MPASS((node)->tn_type == VDIR); \
+    MPASS((node)->tn_size % sizeof(struct tmpfs_dirent) == 0); \
+    MPASS((node)->tn_dir.tn_readdir_lastp == NULL || \
+	tmpfs_dircookie((node)->tn_dir.tn_readdir_lastp) == (node)->tn_dir.tn_readdir_lastn);
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Memory management stuff.
+ */
+
+/* Amount of memory pages to reserve for the system (e.g., to not use by
+ * tmpfs).
+ * XXX: Should this be tunable through sysctl, for instance? */
+#define TMPFS_PAGES_RESERVED (4 * 1024 * 1024 / PAGE_SIZE)
+
+/*
+ * Returns information about the number of available memory pages,
+ * including physical and virtual ones.
+ *
+ * If 'total' is TRUE, the value returned is the total amount of memory
+ * pages configured for the system (either in use or free).
+ * If it is FALSE, the value returned is the amount of free memory pages.
+ *
+ * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid
+ * excessive memory usage.
+ *
+ */
+static __inline size_t
+tmpfs_mem_info(void)
+{
+	size_t size;
+
+	size = swap_pager_avail + cnt.v_free_count + cnt.v_inactive_count;
+	size -= size > cnt.v_wire_count ? cnt.v_wire_count : size;
+	return size;
+}
+
+/* Returns the maximum size allowed for a tmpfs file system.  This macro
+ * must be used instead of directly retrieving the value from tm_pages_max.
+ * The reason is that the size of a tmpfs file system is dynamic: it lets
+ * the user store files as long as there is enough free memory (including
+ * physical memory and swap space).  Therefore, the amount of memory to be
+ * used is either the limit imposed by the user during mount time or the
+ * amount of available memory, whichever is lower.  To avoid consuming all
+ * the memory for a given mount point, the system will always reserve a
+ * minimum of TMPFS_PAGES_RESERVED pages, which is also taken into account
+ * by this macro (see above). */
+static __inline size_t
+TMPFS_PAGES_MAX(struct tmpfs_mount *tmp)
+{
+	size_t freepages;
+
+	freepages = tmpfs_mem_info();
+	freepages -= freepages < TMPFS_PAGES_RESERVED ?
+	    freepages : TMPFS_PAGES_RESERVED;
+
+	return MIN(tmp->tm_pages_max, freepages + tmp->tm_pages_used);
+}
+
+/* Returns the available space for the given file system. */
+#define TMPFS_META_PAGES(tmp) (howmany((tmp)->tm_nodes_inuse * (sizeof(struct tmpfs_node) \
+				+ sizeof(struct tmpfs_dirent)), PAGE_SIZE))
+#define TMPFS_FILE_PAGES(tmp) ((tmp)->tm_pages_used)
+
+#define TMPFS_PAGES_AVAIL(tmp) (TMPFS_PAGES_MAX(tmp) > \
+			TMPFS_META_PAGES(tmp)+TMPFS_FILE_PAGES(tmp)? \
+			TMPFS_PAGES_MAX(tmp) - TMPFS_META_PAGES(tmp) \
+			- TMPFS_FILE_PAGES(tmp):0)
+
+#endif
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Macros/functions to convert from generic data structures to tmpfs
+ * specific ones.
+ */
+
+static inline
+struct tmpfs_mount *
+VFS_TO_TMPFS(struct mount *mp)
+{
+	struct tmpfs_mount *tmp;
+
+	MPASS((mp) != NULL && (mp)->mnt_data != NULL);
+	tmp = (struct tmpfs_mount *)(mp)->mnt_data;
+	return tmp;
+}
+
+static inline
+struct tmpfs_node *
+VP_TO_TMPFS_NODE(struct vnode *vp)
+{
+	struct tmpfs_node *node;
+
+	MPASS((vp) != NULL && (vp)->v_data != NULL);
+	node = (struct tmpfs_node *)vp->v_data;
+	return node;
+}
+
+static inline
+struct tmpfs_node *
+VP_TO_TMPFS_DIR(struct vnode *vp)
+{
+	struct tmpfs_node *node;
+
+	node = VP_TO_TMPFS_NODE(vp);
+	TMPFS_VALIDATE_DIR(node);
+	return node;
+}
+
+#endif /* _FS_TMPFS_TMPFS_H_ */
--- /dev/null
+++ sys/fs/tmpfs/tmpfs_vfsops.c
@@ -0,0 +1,473 @@
+/*	$NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $	*/
+
+/*
+ * Copyright (c) 2005 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
+ * 2005 program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Efficient memory file system.
+ *
+ * tmpfs is a file system that uses NetBSD's virtual memory sub-system
+ * (the well-known UVM) to store file data and metadata in an efficient
+ * way.  This means that it does not follow the structure of an on-disk
+ * file system because it simply does not need to.  Instead, it uses
+ * memory-specific data structures and algorithms to automatically
+ * allocate and release resources.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/tmpfs/tmpfs_vfsops.c,v 1.11.2.3 2007/11/26 06:46:44 delphij Exp $");
+
+#include <sys/param.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_param.h>
+
+#include <fs/tmpfs/tmpfs.h>
+
+/*
+ * Default permission for root node
+ */
+#define TMPFS_DEFAULT_ROOT_MODE	(S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
+
+MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures");
+MALLOC_DEFINE(M_TMPFSNAME, "tmpfs name", "tmpfs file names");
+
+/* --------------------------------------------------------------------- */
+
+static int	tmpfs_mount(struct mount *, struct thread *);
+static int	tmpfs_unmount(struct mount *, int, struct thread *);
+static int	tmpfs_root(struct mount *, int flags, struct vnode **,
+		    struct thread *);
+static int	tmpfs_fhtovp(struct mount *, struct fid *, struct vnode **);
+static int	tmpfs_statfs(struct mount *, struct statfs *, struct thread *);
+
+/* --------------------------------------------------------------------- */
+
+static const char *tmpfs_opts[] = {
+	"from", "size", "inodes", "uid", "gid", "mode", "export",
+	NULL
+};
+
+/* --------------------------------------------------------------------- */
+
+#define SWI_MAXMIB	3
+
+static u_int
+get_swpgtotal(void)
+{
+	struct xswdev xsd;
+	char *sname = "vm.swap_info";
+	int soid[SWI_MAXMIB], oid[2];
+	u_int unswdev, total, dmmax, nswapdev;
+	size_t mibi, len;
+
+	total = 0;
+
+	len = sizeof(dmmax);
+	if (kernel_sysctlbyname(curthread, "vm.dmmax", &dmmax, &len,
+				NULL, 0, NULL, 0) != 0)
+		return total;
+
+	len = sizeof(nswapdev);
+	if (kernel_sysctlbyname(curthread, "vm.nswapdev",
+				&nswapdev, &len,
+				NULL, 0, NULL, 0) != 0)
+		return total;
+
+	mibi = (SWI_MAXMIB - 1) * sizeof(int);
+	oid[0] = 0;
+	oid[1] = 3;
+
+	if (kernel_sysctl(curthread, oid, 2,
+			soid, &mibi, (void *)sname, strlen(sname),
+			NULL, 0) != 0)
+		return total;
+
+	mibi = (SWI_MAXMIB - 1);
+	for (unswdev = 0; unswdev < nswapdev; ++unswdev) {
+		soid[mibi] = unswdev;
+		len = sizeof(struct xswdev);
+		if (kernel_sysctl(curthread,
+				soid, mibi + 1, &xsd, &len, NULL, 0,
+				NULL, 0) != 0)
+			return total;
+		if (len == sizeof(struct xswdev))
+			total += (xsd.xsw_nblks - dmmax);
+	}
+
+	/* Not Reached */
+	return total;
+}
+
+/* --------------------------------------------------------------------- */
+static int
+tmpfs_node_ctor(void *mem, int size, void *arg, int flags)
+{
+	struct tmpfs_node *node = (struct tmpfs_node *)mem;
+
+	node->tn_gen++;
+	node->tn_size = 0;
+	node->tn_status = 0;
+	node->tn_flags = 0;
+	node->tn_links = 0;
+	node->tn_lockf = NULL;
+	node->tn_vnode = NULL;
+	node->tn_vpstate = 0;
+
+	return (0);
+}
+
+static void
+tmpfs_node_dtor(void *mem, int size, void *arg)
+{
+	struct tmpfs_node *node = (struct tmpfs_node *)mem;
+	node->tn_type = VNON;
+}
+
+static int
+tmpfs_node_init(void *mem, int size, int flags)
+{
+	struct tmpfs_node *node = (struct tmpfs_node *)mem;
+	node->tn_id = 0;
+
+	mtx_init(&node->tn_interlock, "tmpfs node interlock", NULL, MTX_DEF);
+	node->tn_gen = arc4random();
+
+	return (0);
+}
+
+static void
+tmpfs_node_fini(void *mem, int size)
+{
+	struct tmpfs_node *node = (struct tmpfs_node *)mem;
+
+	mtx_destroy(&node->tn_interlock);
+}
+
+static int
+tmpfs_mount(struct mount *mp, struct thread *td)
+{
+	struct tmpfs_mount *tmp;
+	struct tmpfs_node *root;
+	size_t pages, mem_size;
+	ino_t nodes;
+	int error;
+	/* Size counters. */
+	ino_t	nodes_max;
+	off_t	size_max;
+
+	/* Root node attributes. */
+	uid_t	root_uid;
+	gid_t	root_gid;
+	mode_t	root_mode;
+
+	struct vattr	va;
+
+	if (vfs_filteropt(mp->mnt_optnew, tmpfs_opts))
+		return (EINVAL);
+
+	if (mp->mnt_flag & MNT_UPDATE) {
+		/* XXX: There is no support yet to update file system
+		 * settings.  Should be added. */
+
+		return EOPNOTSUPP;
+	}
+
+	printf("WARNING: TMPFS is considered to be a highly experimental "
+	    "feature in FreeBSD.\n");
+
+	vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY, td);
+	error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred, td);
+	VOP_UNLOCK(mp->mnt_vnodecovered, 0, td);
+	if (error)
+		return (error);
+
+	if (mp->mnt_cred->cr_ruid != 0 ||
+	    vfs_scanopt(mp->mnt_optnew, "gid", "%d", &root_gid) != 1)
+		root_gid = va.va_gid;
+	if (mp->mnt_cred->cr_ruid != 0 ||
+	    vfs_scanopt(mp->mnt_optnew, "uid", "%d", &root_uid) != 1)
+		root_uid = va.va_uid;
+	if (mp->mnt_cred->cr_ruid != 0 ||
+	    vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1)
+		root_mode = va.va_mode;
+	if (vfs_scanopt(mp->mnt_optnew, "inodes", "%d", &nodes_max) != 1)
+		nodes_max = 0;
+	if (vfs_scanopt(mp->mnt_optnew, "size", "%qu", &size_max) != 1)
+		size_max = 0;
+
+	/* Do not allow mounts if we do not have enough memory to preserve
+	 * the minimum reserved pages. */
+	mem_size = cnt.v_free_count + cnt.v_inactive_count + get_swpgtotal();
+	mem_size -= mem_size > cnt.v_wire_count ? cnt.v_wire_count : mem_size;
+	if (mem_size < TMPFS_PAGES_RESERVED)
+		return ENOSPC;
+
+	/* Get the maximum number of memory pages this file system is
+	 * allowed to use, based on the maximum size the user passed in
+	 * the mount structure.  A value of zero is treated as if the
+	 * maximum available space was requested. */
+	if (size_max < PAGE_SIZE || size_max >= SIZE_MAX)
+		pages = SIZE_MAX;
+	else
+		pages = howmany(size_max, PAGE_SIZE);
+	MPASS(pages > 0);
+
+	if (nodes_max <= 3)
+		nodes = 3 + pages * PAGE_SIZE / 1024;
+	else
+		nodes = nodes_max;
+	MPASS(nodes >= 3);
+
+	/* Allocate the tmpfs mount structure and fill it. */
+	tmp = (struct tmpfs_mount *)malloc(sizeof(struct tmpfs_mount),
+	    M_TMPFSMNT, M_WAITOK | M_ZERO);
+
+	mtx_init(&tmp->allnode_lock, "tmpfs allnode lock", NULL, MTX_DEF);
+	tmp->tm_nodes_max = nodes;
+	tmp->tm_nodes_inuse = 0;
+	tmp->tm_maxfilesize = (u_int64_t)(cnt.v_page_count + get_swpgtotal()) * PAGE_SIZE;
+	LIST_INIT(&tmp->tm_nodes_used);
+
+	tmp->tm_pages_max = pages;
+	tmp->tm_pages_used = 0;
+	tmp->tm_ino_unr = new_unrhdr(2, INT_MAX, &tmp->allnode_lock);
+	tmp->tm_dirent_pool = uma_zcreate("TMPFS dirent",
+	    sizeof(struct tmpfs_dirent),
+	    NULL, NULL, NULL, NULL,
+	    UMA_ALIGN_PTR, 0);
+	tmp->tm_node_pool = uma_zcreate("TMPFS node",
+	    sizeof(struct tmpfs_node),
+	    tmpfs_node_ctor, tmpfs_node_dtor,
+	    tmpfs_node_init, tmpfs_node_fini,
+	    UMA_ALIGN_PTR, 0);
+
+	/* Allocate the root node. */
+	error = tmpfs_alloc_node(tmp, VDIR, root_uid,
+	    root_gid, root_mode & ALLPERMS, NULL, NULL,
+	    VNOVAL, td, &root);
+
+	if (error != 0 || root == NULL) {
+	    uma_zdestroy(tmp->tm_node_pool);
+	    uma_zdestroy(tmp->tm_dirent_pool);
+	    delete_unrhdr(tmp->tm_ino_unr);
+	    free(tmp, M_TMPFSMNT);
+	    return error;
+	}
+	KASSERT(root->tn_id == 2, ("tmpfs root with invalid ino: %d", root->tn_id));
+	tmp->tm_root = root;
+
+	MNT_ILOCK(mp);
+	mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_kern_flag |= MNTK_MPSAFE;
+	MNT_IUNLOCK(mp);
+
+	mp->mnt_data = tmp;
+	mp->mnt_stat.f_namemax = MAXNAMLEN;
+	vfs_getnewfsid(mp);
+	vfs_mountedfrom(mp, "tmpfs");
+
+	return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+/* ARGSUSED2 */
+static int
+tmpfs_unmount(struct mount *mp, int mntflags, struct thread *l)
+{
+	int error;
+	int flags = 0;
+	struct tmpfs_mount *tmp;
+	struct tmpfs_node *node;
+
+	/* Handle forced unmounts. */
+	if (mntflags & MNT_FORCE)
+		flags |= FORCECLOSE;
+
+	/* Finalize all pending I/O. */
+	error = vflush(mp, 0, flags, l);
+	if (error != 0)
+		return error;
+
+	tmp = VFS_TO_TMPFS(mp);
+
+	/* Free all associated data.  The loop iterates over the linked list
+	 * we have containing all used nodes.  For each of them that is
+	 * a directory, we free all its directory entries.  Note that after
+	 * freeing a node, it will automatically go to the available list,
+	 * so we will later have to iterate over it to release its items. */
+	node = LIST_FIRST(&tmp->tm_nodes_used);
+	while (node != NULL) {
+		struct tmpfs_node *next;
+
+		if (node->tn_type == VDIR) {
+			struct tmpfs_dirent *de;
+
+			de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
+			while (de != NULL) {
+				struct tmpfs_dirent *nde;
+
+				nde = TAILQ_NEXT(de, td_entries);
+				tmpfs_free_dirent(tmp, de, FALSE);
+				de = nde;
+				node->tn_size -= sizeof(struct tmpfs_dirent);
+			}
+		}
+
+		next = LIST_NEXT(node, tn_entries);
+		tmpfs_free_node(tmp, node);
+		node = next;
+	}
+
+	uma_zdestroy(tmp->tm_dirent_pool);
+	uma_zdestroy(tmp->tm_node_pool);
+	delete_unrhdr(tmp->tm_ino_unr);
+
+	mtx_destroy(&tmp->allnode_lock);
+	MPASS(tmp->tm_pages_used == 0);
+	MPASS(tmp->tm_nodes_inuse == 0);
+
+	/* Throw away the tmpfs_mount structure. */
+	free(mp->mnt_data, M_TMPFSMNT);
+	mp->mnt_data = NULL;
+
+	MNT_ILOCK(mp);
+	mp->mnt_flag &= ~MNT_LOCAL;
+	MNT_IUNLOCK(mp);
+	return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
+{
+	int error;
+	error = tmpfs_alloc_vp(mp, VFS_TO_TMPFS(mp)->tm_root, flags, vpp, td);
+
+	if (!error)
+		(*vpp)->v_vflag |= VV_ROOT;
+
+	return error;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int
+tmpfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
+{
+	boolean_t found;
+	struct tmpfs_fid *tfhp;
+	struct tmpfs_mount *tmp;
+	struct tmpfs_node *node;
+
+	tmp = VFS_TO_TMPFS(mp);
+
+	tfhp = (struct tmpfs_fid *)fhp;
+	if (tfhp->tf_len != sizeof(struct tmpfs_fid))
+		return EINVAL;
+
+	if (tfhp->tf_id >= tmp->tm_nodes_max)
+		return EINVAL;
+
+	found = FALSE;
+
+	TMPFS_LOCK(tmp);
+	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
+		if (node->tn_id == tfhp->tf_id &&
+		    node->tn_gen == tfhp->tf_gen) {
+			found = TRUE;
+			break;
+		}
+	}
+	TMPFS_UNLOCK(tmp);
+
+	if (found)
+		return (tmpfs_alloc_vp(mp, node, LK_EXCLUSIVE, vpp, curthread));
+
+	return (EINVAL);
+}
+
+/* --------------------------------------------------------------------- */
+
+/* ARGSUSED2 */
+static int
+tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *l)
+{
+	fsfilcnt_t freenodes;
+	struct tmpfs_mount *tmp;
+
+	tmp = VFS_TO_TMPFS(mp);
+
+	sbp->f_iosize = PAGE_SIZE;
+	sbp->f_bsize = PAGE_SIZE;
+
+	sbp->f_blocks = TMPFS_PAGES_MAX(tmp);
+	sbp->f_bavail = sbp->f_bfree = TMPFS_PAGES_AVAIL(tmp);
+
+	freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_inuse,
+	    TMPFS_PAGES_AVAIL(tmp) * PAGE_SIZE / sizeof(struct tmpfs_node));
+
+	sbp->f_files = freenodes + tmp->tm_nodes_inuse;
+	sbp->f_ffree = freenodes;
+	/* sbp->f_owner = tmp->tn_uid; */
+
+	return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * tmpfs vfs operations.
+ */
+
+struct vfsops tmpfs_vfsops = {
+	.vfs_mount =			tmpfs_mount,
+	.vfs_unmount =			tmpfs_unmount,
+	.vfs_root =			tmpfs_root,
+	.vfs_statfs =			tmpfs_statfs,
+	.vfs_fhtovp =			tmpfs_fhtovp,
+};
+VFS_SET(tmpfs_vfsops, tmpfs, 0);
--- /dev/null
+++ sys/fs/tmpfs/tmpfs_vnops.h
@@ -0,0 +1,64 @@
+/*	$NetBSD: tmpfs_vnops.h,v 1.7 2005/12/03 17:34:44 christos Exp $	*/
+
+/*
+ * Copyright (c) 2005 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
+ * 2005 program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/fs/tmpfs/tmpfs_vnops.h,v 1.2 2007/06/28 02:36:41 delphij Exp $
+ */
+
+#ifndef _FS_TMPFS_TMPFS_VNOPS_H_
+#define _FS_TMPFS_TMPFS_VNOPS_H_
+
+#if !defined(_KERNEL)
+#error not supposed to be exposed to userland.
+#endif
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Declarations for tmpfs_vnops.c.
+ */
+
+extern struct vop_vector tmpfs_vnodeop_entries;
+
+vop_access_t	tmpfs_access;
+vop_getattr_t	tmpfs_getattr;
+vop_setattr_t	tmpfs_setattr;
+vop_reclaim_t	tmpfs_reclaim;
+
+/* --------------------------------------------------------------------- */
+
+#endif /* _FS_TMPFS_TMPFS_VNOPS_H_ */
--- /dev/null
+++ sys/fs/tmpfs/tmpfs_fifoops.h
@@ -0,0 +1,60 @@
+/*	$NetBSD: tmpfs_fifoops.h,v 1.4 2005/12/03 17:34:44 christos Exp $	*/
+
+/*
+ * Copyright (c) 2005 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
+ * 2005 program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/fs/tmpfs/tmpfs_fifoops.h,v 1.3 2007/06/28 02:36:41 delphij Exp $
+ */
+
+#ifndef _FS_TMPFS_TMPFS_FIFOOPS_H_
+#define _FS_TMPFS_TMPFS_FIFOOPS_H_
+
+#if !defined(_KERNEL)
+#error not supposed to be exposed to userland.
+#endif
+
+#include <fs/tmpfs/tmpfs_vnops.h>
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Declarations for tmpfs_fifoops.c.
+ */
+
+extern struct vop_vector tmpfs_fifoop_entries;
+
+/* --------------------------------------------------------------------- */
+#endif /* _FS_TMPFS_TMPFS_FIFOOPS_H_ */
Index: udf_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/udf/udf_vnops.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/udf/udf_vnops.c -L sys/fs/udf/udf_vnops.c -u -r1.2 -r1.3
--- sys/fs/udf/udf_vnops.c
+++ sys/fs/udf/udf_vnops.c
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/udf/udf_vnops.c,v 1.58.2.1 2006/03/12 21:50:02 scottl Exp $
+ * $FreeBSD: src/sys/fs/udf/udf_vnops.c,v 1.66 2007/06/11 20:14:44 remko Exp $
  */
 
 /* udf_vnops.c */
@@ -57,6 +57,7 @@
 
 static vop_access_t	udf_access;
 static vop_getattr_t	udf_getattr;
+static vop_open_t	udf_open;
 static vop_ioctl_t	udf_ioctl;
 static vop_pathconf_t	udf_pathconf;
 static vop_read_t	udf_read;
@@ -66,6 +67,7 @@
 static vop_bmap_t	udf_bmap;
 static vop_cachedlookup_t	udf_lookup;
 static vop_reclaim_t	udf_reclaim;
+static vop_vptofh_t	udf_vptofh;
 static int udf_readatoffset(struct udf_node *node, int *size, off_t offset,
     struct buf **bp, uint8_t **data);
 static int udf_bmap_internal(struct udf_node *node, off_t offset,
@@ -80,16 +82,18 @@
 	.vop_getattr =		udf_getattr,
 	.vop_ioctl =		udf_ioctl,
 	.vop_lookup =		vfs_cache_lookup,
+	.vop_open =		udf_open,
 	.vop_pathconf =		udf_pathconf,
 	.vop_read =		udf_read,
 	.vop_readdir =		udf_readdir,
 	.vop_readlink =		udf_readlink,
 	.vop_reclaim =		udf_reclaim,
 	.vop_strategy =		udf_strategy,
+	.vop_vptofh =		udf_vptofh,
 };
 
-MALLOC_DEFINE(M_UDFFID, "UDF FID", "UDF FileId structure");
-MALLOC_DEFINE(M_UDFDS, "UDF DS", "UDF Dirstream structure");
+MALLOC_DEFINE(M_UDFFID, "udf_fid", "UDF FileId structure");
+MALLOC_DEFINE(M_UDFDS, "udf_ds", "UDF Dirstream structure");
 
 #define UDF_INVALID_BMAP	-1
 
@@ -159,6 +163,16 @@
 	    a_mode, a->a_cred, NULL));
 }
 
+static int
+udf_open(struct vop_open_args *ap) {
+	struct udf_node *np = VTON(ap->a_vp);
+	off_t fsize;
+
+	fsize = le64toh(np->fentry->inf_len);
+	vnode_create_vobject(ap->a_vp, fsize, ap->a_td);
+	return 0;
+}
+
 static int mon_lens[2][12] = {
 	{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
 	{31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}
@@ -327,38 +341,60 @@
 	}
 }
 
+#define lblkno(udfmp, loc)	((loc) >> (udfmp)->bshift)
+#define blkoff(udfmp, loc)	((loc) & (udfmp)->bmask)
+#define lblktosize(imp, blk)	((blk) << (udfmp)->bshift)
+
 static int
-udf_read(struct vop_read_args *a)
+udf_read(struct vop_read_args *ap)
 {
-	struct vnode *vp = a->a_vp;
-	struct uio *uio = a->a_uio;
+	struct vnode *vp = ap->a_vp;
+	struct uio *uio = ap->a_uio;
 	struct udf_node *node = VTON(vp);
+	struct udf_mnt *udfmp;
 	struct buf *bp;
-	uint8_t *data;
-	off_t fsize, offset;
+	daddr_t lbn, rablock;
+	off_t diff, fsize;
 	int error = 0;
-	int size;
+	long size, n, on;
 
+	if (uio->uio_resid == 0)
+		return (0);
 	if (uio->uio_offset < 0)
 		return (EINVAL);
-
 	fsize = le64toh(node->fentry->inf_len);
-
-	while (uio->uio_offset < fsize && uio->uio_resid > 0) {
-		offset = uio->uio_offset;
-		if (uio->uio_resid + offset <= fsize)
-			size = uio->uio_resid;
-		else
-			size = fsize - offset;
-		error = udf_readatoffset(node, &size, offset, &bp, &data);
-		if (error == 0)
-			error = uiomove(data, size, uio);
-		if (bp != NULL)
+	udfmp = node->udfmp;
+	do {
+		lbn = lblkno(udfmp, uio->uio_offset);
+		on = blkoff(udfmp, uio->uio_offset);
+		n = min((u_int)(udfmp->bsize - on),
+			uio->uio_resid);
+		diff = fsize - uio->uio_offset;
+		if (diff <= 0)
+			return (0);
+		if (diff < n)
+			n = diff;
+		size = udfmp->bsize;
+		rablock = lbn + 1;
+		if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+			if (lblktosize(udfmp, rablock) < fsize) {
+				error = cluster_read(vp, fsize, lbn, size, NOCRED,
+					uio->uio_resid, (ap->a_ioflag >> 16), &bp);
+			} else {
+				error = bread(vp, lbn, size, NOCRED, &bp);
+			}
+		} else {
+			error = bread(vp, lbn, size, NOCRED, &bp);
+		}
+		n = min(n, size - bp->b_resid);
+		if (error) {
 			brelse(bp);
-		if (error)
-			break;
-	};
+			return (error);
+		}
 
+		error = uiomove(bp->b_data + on, (int)n, uio);
+		brelse(bp);
+	} while (error == 0 && uio->uio_resid > 0 && n != 0);
 	return (error);
 }
 
@@ -764,23 +800,29 @@
 	struct vnode *vp;
 	struct udf_node *node;
 	int maxsize;
+	daddr_t sector;
 	struct bufobj *bo;
+	int multiplier;
 
 	bp = a->a_bp;
 	vp = a->a_vp;
 	node = VTON(vp);
 
-	/* cd9660 has this test reversed, but it seems more logical this way */
-	if (bp->b_blkno != bp->b_lblkno) {
+	if (bp->b_blkno == bp->b_lblkno) {
 		/*
 		 * Files that are embedded in the fentry don't translate well
 		 * to a block number.  Reject.
 		 */
 		if (udf_bmap_internal(node, bp->b_lblkno * node->udfmp->bsize,
-		    &bp->b_lblkno, &maxsize)) {
+		    &sector, &maxsize)) {
 			clrbuf(bp);
 			bp->b_blkno = -1;
 		}
+
+		/* bmap gives sector numbers, bio works with device blocks */
+		multiplier = node->udfmp->bsize / DEV_BSIZE;
+		bp->b_blkno = sector * multiplier;
+
 	}
 	if ((long)bp->b_blkno == -1) {
 		bufdone(bp);
@@ -860,7 +902,7 @@
 	 * If this is a LOOKUP and we've already partially searched through
 	 * the directory, pick up where we left off and flag that the
 	 * directory may need to be searched twice.  For a full description,
-	 * see /sys/isofs/cd9660/cd9660_lookup.c:cd9660_lookup()
+	 * see /sys/fs/cd9660/cd9660_lookup.c:cd9660_lookup()
 	 */
 	if (nameiop != LOOKUP || node->diroff == 0 || node->diroff > fsize) {
 		offset = 0;
@@ -982,6 +1024,20 @@
 	return (0);
 }
 
+static int
+udf_vptofh(struct vop_vptofh_args *a)
+{
+	struct udf_node *node;
+	struct ifid *ifhp;
+
+	node = VTON(a->a_vp);
+	ifhp = (struct ifid *)a->a_fhp;
+	ifhp->ifid_len = sizeof(struct ifid);
+	ifhp->ifid_ino = node->hash_id;
+
+	return (0);
+}
+
 /*
  * Read the block and then set the data pointer to correspond with the
  * offset passed in.  Only read in at most 'size' bytes, and then set 'size'
@@ -1024,20 +1080,22 @@
 		*size = max_size;
 	*size = min(*size, MAXBSIZE);
 
-	if ((error = udf_readlblks(udfmp, sector, *size, bp))) {
+	if ((error = udf_readlblks(udfmp, sector, *size + (offset & udfmp->bmask), bp))) {
 		printf("warning: udf_readlblks returned error %d\n", error);
 		/* note: *bp may be non-NULL */
 		return (error);
 	}
 
 	bp1 = *bp;
-	*data = (uint8_t *)&bp1->b_data[offset % udfmp->bsize];
+	*data = (uint8_t *)&bp1->b_data[offset & udfmp->bmask];
 	return (0);
 }
 
 /*
  * Translate a file offset into a logical block and then into a physical
  * block.
+ * max_size - maximum number of bytes that can be read starting from given
+ * offset, rather than beginning of calculated sector number
  */
 static int
 udf_bmap_internal(struct udf_node *node, off_t offset, daddr_t *sector,
@@ -1090,9 +1148,9 @@
 		} while(offset >= icblen);
 
 		lsector = (offset  >> udfmp->bshift) +
-		    ((struct short_ad *)(icb))->pos;
+		    le32toh(((struct short_ad *)(icb))->pos);
 
-		*max_size = GETICBLEN(short_ad, icb);
+		*max_size = icblen - offset;
 
 		break;
 	case 1:
@@ -1117,7 +1175,7 @@
 		lsector = (offset >> udfmp->bshift) +
 		    le32toh(((struct long_ad *)(icb))->loc.lb_num);
 
-		*max_size = GETICBLEN(long_ad, icb);
+		*max_size = icblen - offset;
 
 		break;
 	case 3:
Index: udf_vfsops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/udf/udf_vfsops.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/udf/udf_vfsops.c -L sys/fs/udf/udf_vfsops.c -u -r1.2 -r1.3
--- sys/fs/udf/udf_vfsops.c
+++ sys/fs/udf/udf_vfsops.c
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/udf/udf_vfsops.c,v 1.33.2.2 2006/02/20 00:53:13 yar Exp $
+ * $FreeBSD: src/sys/fs/udf/udf_vfsops.c,v 1.48.4.1 2008/01/28 12:51:31 kib Exp $
  */
 
 /* udf_vfsops.c */
@@ -84,6 +84,7 @@
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/vnode.h>
@@ -99,8 +100,8 @@
 #include <fs/udf/udf.h>
 #include <fs/udf/udf_mount.h>
 
-static MALLOC_DEFINE(M_UDFMOUNT, "UDF mount", "UDF mount structure");
-MALLOC_DEFINE(M_UDFFENTRY, "UDF fentry", "UDF file entry structure");
+static MALLOC_DEFINE(M_UDFMOUNT, "udf_mount", "UDF mount structure");
+MALLOC_DEFINE(M_UDFFENTRY, "udf_fentry", "UDF file entry structure");
 
 struct iconv_functions *udf_iconv = NULL;
 
@@ -116,7 +117,6 @@
 static vfs_statfs_t    udf_statfs;
 static vfs_unmount_t   udf_unmount;
 static vfs_fhtovp_t	udf_fhtovp;
-static vfs_vptofh_t	udf_vptofh;
 
 static int udf_find_partmaps(struct udf_mnt *, struct logvol_desc *);
 
@@ -129,7 +129,6 @@
 	.vfs_uninit =		udf_uninit,
 	.vfs_unmount =		udf_unmount,
 	.vfs_vget =		udf_vget,
-	.vfs_vptofh =		udf_vptofh,
 };
 VFS_SET(udf_vfsops, udf, VFCF_READONLY);
 
@@ -191,7 +190,6 @@
 {
 	struct vnode *devvp;	/* vnode of the mount device */
 	struct udf_mnt *imp = 0;
-	struct export_args *export;
 	struct vfsoptlist *opts;
 	char *fspec, *cs_disk, *cs_local;
 	int error, len, *udf_flags;
@@ -199,8 +197,12 @@
 
 	opts = mp->mnt_optnew;
 
-	if ((mp->mnt_flag & MNT_RDONLY) == 0)
-		return (EROFS);
+	/*
+	 * Unconditionally mount as read-only.
+	 */
+	MNT_ILOCK(mp);
+	mp->mnt_flag |= MNT_RDONLY;
+	MNT_IUNLOCK(mp);
 
 	/*
 	 * No root filesystem support.  Probably not a big deal, since the
@@ -215,14 +217,7 @@
 		return (EINVAL);
 
 	if (mp->mnt_flag & MNT_UPDATE) {
-		imp = VFSTOUDFFS(mp);
-		if (fspec == NULL) {
-			error = vfs_getopt(opts, "export", (void **)&export,
-			    &len);
-			if (error || len != sizeof(struct export_args))
-				return (EINVAL);
-			return (vfs_export(mp, export));
-		}
+		return (0);
 	}
 
 	/* Check that the mount device exists */
@@ -242,7 +237,7 @@
 	/* Check the access rights on the mount device */
 	error = VOP_ACCESS(devvp, VREAD, td->td_ucred, td);
 	if (error)
-		error = suser(td);
+		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 	if (error) {
 		vput(devvp);
 		return (error);
@@ -292,7 +287,7 @@
 
 	itag = (uint8_t *)tag;
 
-	if (tag->id != id)
+	if (le16toh(tag->id) != id)
 		return (EINVAL);
 
 	for (i = 0; i < 15; i++)
@@ -315,6 +310,7 @@
 	struct fileset_desc *fsd;
 	struct file_entry *root_fentry;
 	uint32_t sector, size, mvds_start, mvds_end;
+	uint32_t logical_secsize;
 	uint32_t fsd_offset = 0;
 	uint16_t part_num = 0, fsd_part = 0;
 	int error = EINVAL;
@@ -346,7 +342,9 @@
 	mp->mnt_data = (qaddr_t)udfmp;
 	mp->mnt_stat.f_fsid.val[0] = dev2udev(devvp->v_rdev);
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
+	MNT_ILOCK(mp);
 	mp->mnt_flag |= MNT_LOCAL;
+	MNT_IUNLOCK(mp);
 	udfmp->im_mountp = mp;
 	udfmp->im_dev = devvp->v_rdev;
 	udfmp->im_devvp = devvp;
@@ -357,16 +355,31 @@
 #if 0
 	udfmp->im_l2d = NULL;
 #endif
+	/*
+	 * The UDF specification defines a logical sectorsize of 2048
+	 * for DVD media.
+	 */
+	logical_secsize = 2048;
 
-	bsize = 2048;	/* XXX Should probe the media for it's size */
+	if (((logical_secsize % cp->provider->sectorsize) != 0) ||
+	    (logical_secsize < cp->provider->sectorsize)) {
+		DROP_GIANT();
+		g_topology_lock();
+		g_vfs_close(cp, td);
+		g_topology_unlock();
+		PICKUP_GIANT();
+		return (EINVAL);
+	}
+
+	bsize = cp->provider->sectorsize;
 
 	/* 
 	 * Get the Anchor Volume Descriptor Pointer from sector 256.
 	 * XXX Should also check sector n - 256, n, and 512.
 	 */
 	sector = 256;
-	if ((error = bread(devvp, sector * btodb(bsize), bsize, NOCRED,
-			   &bp)) != 0)
+	if ((error = bread(devvp, sector * btodb(logical_secsize), bsize,
+			   NOCRED, &bp)) != 0)
 		goto bail;
 	if ((error = udf_checktag((struct desc_tag *)bp->b_data, TAGID_ANCHOR)))
 		goto bail;
@@ -384,8 +397,8 @@
 	mvds_start = le32toh(avdp.main_vds_ex.loc);
 	mvds_end = mvds_start + (le32toh(avdp.main_vds_ex.len) - 1) / bsize;
 	for (sector = mvds_start; sector < mvds_end; sector++) {
-		if ((error = bread(devvp, sector * btodb(bsize), bsize, 
-				   NOCRED, &bp)) != 0) {
+		if ((error = bread(devvp, sector * btodb(logical_secsize),
+				   bsize, NOCRED, &bp)) != 0) {
 			printf("Can't read sector %d of VDS\n", sector);
 			goto bail;
 		}
@@ -522,7 +535,9 @@
 	FREE(udfmp, M_UDFMOUNT);
 
 	mp->mnt_data = (qaddr_t)0;
+	MNT_ILOCK(mp);
 	mp->mnt_flag &= ~MNT_LOCAL;
+	MNT_IUNLOCK(mp);
 
 	return (0);
 }
@@ -598,7 +613,13 @@
 	unode->udfmp = udfmp;
 	vp->v_data = unode;
 
-	error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
+	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL, td);
+	error = insmntque(vp, mp);
+	if (error != 0) {
+		uma_zfree(udf_zone_node, unode);
+		return (error);
+	}
+	error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL);
 	if (error || *vpp != NULL)
 		return (error);
 
@@ -609,6 +630,7 @@
 	devvp = udfmp->im_devvp;
 	if ((error = RDSECTOR(devvp, sector, udfmp->bsize, &bp)) != 0) {
 		printf("Cannot read sector %d\n", sector);
+		vgone(vp);
 		vput(vp);
 		brelse(bp);
 		*vpp = NULL;
@@ -618,6 +640,7 @@
 	fe = (struct file_entry *)bp->b_data;
 	if (udf_checktag(&fe->tag, TAGID_FENTRY)) {
 		printf("Invalid file entry!\n");
+		vgone(vp);
 		vput(vp);
 		brelse(bp);
 		*vpp = NULL;
@@ -628,6 +651,7 @@
 	    M_NOWAIT | M_ZERO);
 	if (unode->fentry == NULL) {
 		printf("Cannot allocate file entry block\n");
+		vgone(vp);
 		vput(vp);
 		brelse(bp);
 		*vpp = NULL;
@@ -670,18 +694,13 @@
 	return (0);
 }
 
-struct ifid {
-	u_short	ifid_len;
-	u_short	ifid_pad;
-	int	ifid_ino;
-	long	ifid_start;
-};
-
 static int
 udf_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
 {
 	struct ifid *ifhp;
 	struct vnode *nvp;
+	struct udf_node *np;
+	off_t fsize;
 	int error;
 
 	ifhp = (struct ifid *)fhp;
@@ -691,52 +710,42 @@
 		return (error);
 	}
 
-	*vpp = nvp;
-	vnode_create_vobject_off(*vpp, 0, curthread);
-	return (0);
-}
-
-static int
-udf_vptofh (struct vnode *vp, struct fid *fhp)
-{
-	struct udf_node *node;
-	struct ifid *ifhp;
-
-	node = VTON(vp);
-	ifhp = (struct ifid *)fhp;
-	ifhp->ifid_len = sizeof(struct ifid);
-	ifhp->ifid_ino = node->hash_id;
+	np = VTON(nvp);
+	fsize = le64toh(np->fentry->inf_len);
 
+	*vpp = nvp;
+	vnode_create_vobject(*vpp, fsize, curthread);
 	return (0);
 }
 
 static int
 udf_find_partmaps(struct udf_mnt *udfmp, struct logvol_desc *lvd)
 {
-	union udf_pmap *pmap;
 	struct part_map_spare *pms;
 	struct regid *pmap_id;
 	struct buf *bp;
 	unsigned char regid_id[UDF_REGID_ID_SIZE + 1];
 	int i, k, ptype, psize, error;
+	uint8_t *pmap = (uint8_t *) &lvd->maps[0];
 
 	for (i = 0; i < le32toh(lvd->n_pm); i++) {
-		pmap = (union udf_pmap *)&lvd->maps[i * UDF_PMAP_SIZE];
-		ptype = pmap->data[0];
-		psize = pmap->data[1];
+		ptype = pmap[0];
+		psize = pmap[1];
 		if (((ptype != 1) && (ptype != 2)) ||
-		    ((psize != UDF_PMAP_SIZE) && (psize != 6))) {
+		    ((psize != UDF_PMAP_TYPE1_SIZE) &&
+		     (psize != UDF_PMAP_TYPE2_SIZE))) {
 			printf("Invalid partition map found\n");
 			return (1);
 		}
 
 		if (ptype == 1) {
 			/* Type 1 map.  We don't care */
+			pmap += UDF_PMAP_TYPE1_SIZE;
 			continue;
 		}
 
 		/* Type 2 map.  Gotta find out the details */
-		pmap_id = (struct regid *)&pmap->data[4];
+		pmap_id = (struct regid *)&pmap[4];
 		bzero(&regid_id[0], UDF_REGID_ID_SIZE);
 		bcopy(&pmap_id->id[0], &regid_id[0], UDF_REGID_ID_SIZE);
 
@@ -746,7 +755,8 @@
 			return (1);
 		}
 
-		pms = &pmap->pms;
+		pms = (struct part_map_spare *)pmap;
+		pmap += UDF_PMAP_TYPE2_SIZE;
 		MALLOC(udfmp->s_table, struct udf_sparing_table *,
 		    le32toh(pms->st_size), M_UDFMOUNT, M_NOWAIT | M_ZERO);
 		if (udfmp->s_table == NULL)
Index: osta.h
===================================================================
RCS file: /home/cvs/src/sys/fs/udf/osta.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/udf/osta.h -L sys/fs/udf/osta.h -u -r1.1.1.1 -r1.2
--- sys/fs/udf/osta.h
+++ sys/fs/udf/osta.h
@@ -1,7 +1,22 @@
 /*
  * Prototypes for the OSTA functions
  *
- * $FreeBSD: src/sys/fs/udf/osta.h,v 1.2 2003/11/05 06:55:23 scottl Exp $
+ * $FreeBSD: src/sys/fs/udf/osta.h,v 1.3 2006/09/12 19:02:34 imp Exp $
+ */
+
+/*-
+ **********************************************************************
+ * OSTA compliant Unicode compression, uncompression routines.
+ * Copyright 1995 Micro Design International, Inc.
+ * Written by Jason M. Rinn.
+ * Micro Design International gives permission for the free use of the
+ * following source code.
+ */
+
+/*
+ * Various routines from the OSTA 2.01 specs.  Copyrights are included with
+ * each code segment.  Slight whitespace modifications have been made for
+ * formatting purposes.  Typos/bugs have been fixed.
  */
 
 #ifndef UNIX
Index: ecma167-udf.h
===================================================================
RCS file: /home/cvs/src/sys/fs/udf/ecma167-udf.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/udf/ecma167-udf.h -L sys/fs/udf/ecma167-udf.h -u -r1.1.1.1 -r1.2
--- sys/fs/udf/ecma167-udf.h
+++ sys/fs/udf/ecma167-udf.h
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/udf/ecma167-udf.h,v 1.5 2005/04/14 14:40:09 brueffer Exp $
+ * $FreeBSD: src/sys/fs/udf/ecma167-udf.h,v 1.7 2006/07/25 14:15:50 yar Exp $
  */
 
 /* ecma167-udf.h */
@@ -201,8 +201,6 @@
 	uint8_t			maps[1];
 } __packed;
 
-#define	UDF_PMAP_SIZE	64
-
 /* Type 1 Partition Map [3/10.7.2] */
 struct part_map_1 {
 	uint8_t			type;
@@ -211,6 +209,8 @@
 	uint16_t		part_num;
 } __packed;
 
+#define	UDF_PMAP_TYPE1_SIZE	6
+
 /* Type 2 Partition Map [3/10.7.3] */
 struct part_map_2 {
 	uint8_t			type;
@@ -218,6 +218,8 @@
 	uint8_t			part_id[62];
 } __packed;
 
+#define	UDF_PMAP_TYPE2_SIZE	64
+
 /* Virtual Partition Map [UDF 2.01/2.2.8] */
 struct part_map_virt {
 	uint8_t			type;
@@ -245,7 +247,6 @@
 } __packed;
 
 union udf_pmap {
-	uint8_t			data[UDF_PMAP_SIZE];
 	struct part_map_1	pm1;
 	struct part_map_2	pm2;
 	struct part_map_virt	pmv;
@@ -370,4 +371,4 @@
 #define	GETICB(ad_type, fentry, offset)	\
 	(struct ad_type *)&fentry->data[offset]
 
-#define	GETICBLEN(ad_type, icb)	((struct ad_type *)(icb))->len
+#define	GETICBLEN(ad_type, icb)	le32toh(((struct ad_type *)(icb))->len)
Index: udf.h
===================================================================
RCS file: /home/cvs/src/sys/fs/udf/udf.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/udf/udf.h -L sys/fs/udf/udf.h -u -r1.1.1.1 -r1.2
--- sys/fs/udf/udf.h
+++ sys/fs/udf/udf.h
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/fs/udf/udf.h,v 1.13 2005/03/16 08:09:52 phk Exp $
+ * $FreeBSD: src/sys/fs/udf/udf.h,v 1.14 2007/02/15 22:08:33 pjd Exp $
  */
 
 #define UDF_HASHTBLSIZE 100
@@ -74,6 +74,13 @@
 	int		fid_fragment;
 };
 
+struct ifid {
+	u_short	ifid_len;
+	u_short	ifid_pad;
+	int	ifid_ino;
+	long	ifid_start;
+};
+
 #define	VFSTOUDFFS(mp)	((struct udf_mnt *)((mp)->mnt_data))
 #define	VTON(vp)	((struct udf_node *)((vp)->v_data))
 
--- /dev/null
+++ sys/fs/cd9660/cd9660_rrip.h
@@ -0,0 +1,137 @@
+/*-
+ * Copyright (c) 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace at blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai at spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_rrip.h	8.2 (Berkeley) 12/5/94
+ * $FreeBSD: src/sys/fs/cd9660/cd9660_rrip.h,v 1.9 2007/02/11 13:54:25 rodrigc Exp $
+ */
+
+typedef struct {
+	char   type			[ISODCL (  0,    1)];
+	u_char length			[ISODCL (  2,    2)]; /* 711 */
+	u_char version			[ISODCL (  3,    3)];
+} ISO_SUSP_HEADER;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char mode			[ISODCL (  4,   11)]; /* 733 */
+	char links			[ISODCL ( 12,   19)]; /* 733 */
+	char uid			[ISODCL ( 20,   27)]; /* 733 */
+	char gid			[ISODCL ( 28,   35)]; /* 733 */
+} ISO_RRIP_ATTR;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char dev_t_high			[ISODCL (  4,   11)]; /* 733 */
+	char dev_t_low			[ISODCL ( 12,   19)]; /* 733 */
+} ISO_RRIP_DEVICE;
+
+#define	ISO_SUSP_CFLAG_CONTINUE	0x01
+#define	ISO_SUSP_CFLAG_CURRENT	0x02
+#define	ISO_SUSP_CFLAG_PARENT	0x04
+#define	ISO_SUSP_CFLAG_ROOT	0x08
+#define	ISO_SUSP_CFLAG_VOLROOT	0x10
+#define	ISO_SUSP_CFLAG_HOST	0x20
+
+typedef struct {
+	u_char cflag			[ISODCL (  1,    1)];
+	u_char clen			[ISODCL (  2,    2)];
+	u_char name			[1];			/* XXX */
+} ISO_RRIP_SLINK_COMPONENT;
+#define	ISO_RRIP_SLSIZ	2
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	u_char flags			[ISODCL (  4,	 4)];
+	u_char component		[ISODCL (  5,	 5)];
+} ISO_RRIP_SLINK;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char flags			[ISODCL (  4,	 4)];
+} ISO_RRIP_ALTNAME;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char dir_loc			[ISODCL (  4,	 11)]; /* 733 */
+} ISO_RRIP_CLINK;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char dir_loc			[ISODCL (  4,	 11)]; /* 733 */
+} ISO_RRIP_PLINK;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+} ISO_RRIP_RELDIR;
+
+#define	ISO_SUSP_TSTAMP_FORM17	0x80
+#define	ISO_SUSP_TSTAMP_FORM7	0x00
+#define	ISO_SUSP_TSTAMP_CREAT	0x01
+#define	ISO_SUSP_TSTAMP_MODIFY	0x02
+#define	ISO_SUSP_TSTAMP_ACCESS	0x04
+#define	ISO_SUSP_TSTAMP_ATTR	0x08
+#define	ISO_SUSP_TSTAMP_BACKUP	0x10
+#define	ISO_SUSP_TSTAMP_EXPIRE	0x20
+#define	ISO_SUSP_TSTAMP_EFFECT	0x40
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	u_char flags			[ISODCL (  4,    4)];
+	u_char time			[ISODCL (  5,    5)];
+} ISO_RRIP_TSTAMP;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	u_char flags			[ISODCL (  4,    4)];
+} ISO_RRIP_IDFLAG;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char len_id			[ISODCL (  4,	 4)];
+	char len_des			[ISODCL (  5,	 5)];
+	char len_src			[ISODCL (  6,	 6)];
+	char version			[ISODCL (  7,	 7)];
+} ISO_RRIP_EXTREF;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char check			[ISODCL (  4,	 5)];
+	char skip			[ISODCL (  6,	 6)];
+} ISO_RRIP_OFFSET;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char location			[ISODCL (  4,	11)];
+	char offset			[ISODCL ( 12,	19)];
+	char length			[ISODCL ( 20,	27)];
+} ISO_RRIP_CONT;
--- /dev/null
+++ sys/fs/cd9660/cd9660_vfsops.c
@@ -0,0 +1,823 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace at blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai at spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_vfsops.c	8.18 (Berkeley) 5/22/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/cd9660/cd9660_vfsops.c,v 1.150 2007/03/13 01:50:22 tegge Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/cdio.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/malloc.h>
+#include <sys/stat.h>
+#include <sys/syslog.h>
+#include <sys/iconv.h>
+
+#include <fs/cd9660/iso.h>
+#include <fs/cd9660/iso_rrip.h>
+#include <fs/cd9660/cd9660_node.h>
+#include <fs/cd9660/cd9660_mount.h>
+
+#include <geom/geom.h>
+#include <geom/geom_vfs.h>
+
+MALLOC_DEFINE(M_ISOFSMNT, "isofs_mount", "ISOFS mount structure");
+MALLOC_DEFINE(M_ISOFSNODE, "isofs_node", "ISOFS vnode private part");
+
+struct iconv_functions *cd9660_iconv = NULL;
+
+static vfs_mount_t	cd9660_mount;
+static vfs_cmount_t	cd9660_cmount;
+static vfs_unmount_t	cd9660_unmount;
+static vfs_root_t	cd9660_root;
+static vfs_statfs_t	cd9660_statfs;
+static vfs_vget_t	cd9660_vget;
+static vfs_fhtovp_t	cd9660_fhtovp;
+
+static struct vfsops cd9660_vfsops = {
+	.vfs_fhtovp =		cd9660_fhtovp,
+	.vfs_mount =		cd9660_mount,
+	.vfs_cmount =		cd9660_cmount,
+	.vfs_root =		cd9660_root,
+	.vfs_statfs =		cd9660_statfs,
+	.vfs_unmount =		cd9660_unmount,
+	.vfs_vget =		cd9660_vget,
+};
+VFS_SET(cd9660_vfsops, cd9660, VFCF_READONLY);
+MODULE_VERSION(cd9660, 1);
+
+static int iso_mountfs(struct vnode *devvp, struct mount *mp,
+		       struct thread *td);
+
+/*
+ * VFS Operations.
+ */
+
+static int
+cd9660_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
+{
+	struct iso_args args;
+	int error;
+
+	error = copyin(data, &args, sizeof args);
+	if (error)
+		return (error);
+
+	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
+	ma = mount_arg(ma, "export", &args.export, sizeof args.export);
+	ma = mount_argsu(ma, "cs_disk", args.cs_disk, 64);
+	ma = mount_argsu(ma, "cs_local", args.cs_local, 64);
+	ma = mount_argf(ma, "ssector", "%u", args.ssector);
+	ma = mount_argb(ma, !(args.flags & ISOFSMNT_NORRIP), "norrip");
+	ma = mount_argb(ma, args.flags & ISOFSMNT_GENS, "nogens");
+	ma = mount_argb(ma, args.flags & ISOFSMNT_EXTATT, "noextatt");
+	ma = mount_argb(ma, !(args.flags & ISOFSMNT_NOJOLIET), "nojoliet");
+	ma = mount_argb(ma,
+	    args.flags & ISOFSMNT_BROKENJOLIET, "nobrokenjoliet");
+	ma = mount_argb(ma, args.flags & ISOFSMNT_KICONV, "nokiconv");
+
+	error = kernel_mount(ma, flags);
+
+	return (error);
+}
+
+static int
+cd9660_mount(struct mount *mp, struct thread *td)
+{
+	struct vnode *devvp;
+	char *fspec;
+	int error;
+	mode_t accessmode;
+	struct nameidata ndp;
+	struct iso_mnt *imp = 0;
+
+	/*
+	 * Unconditionally mount as read-only.
+	 */
+	MNT_ILOCK(mp);
+	mp->mnt_flag |= MNT_RDONLY;
+	MNT_IUNLOCK(mp);
+
+	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
+	if (error)
+		return (error);
+
+	imp = VFSTOISOFS(mp);
+
+	if (mp->mnt_flag & MNT_UPDATE) {
+		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0))
+			return (0);
+	}
+	/*
+	 * Not an update, or updating the name: look up the name
+	 * and verify that it refers to a sensible block device.
+	 */
+	NDINIT(&ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td);
+	if ((error = namei(&ndp)))
+		return (error);
+	NDFREE(&ndp, NDF_ONLY_PNBUF);
+	devvp = ndp.ni_vp;
+
+	if (!vn_isdisk(devvp, &error)) {
+		vrele(devvp);
+		return (error);
+	}
+
+	/*
+	 * Verify that user has necessary permissions on the device,
+	 * or has superuser abilities
+	 */
+	accessmode = VREAD;
+	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
+	error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td);
+	if (error)
+		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
+	if (error) {
+		vput(devvp);
+		return (error);
+	}
+	VOP_UNLOCK(devvp, 0, td);
+
+	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
+		error = iso_mountfs(devvp, mp, td);
+	} else {
+		if (devvp != imp->im_devvp)
+			error = EINVAL;	/* needs translation */
+		else
+			vrele(devvp);
+	}
+	if (error) {
+		vrele(devvp);
+		return error;
+	}
+	vfs_mountedfrom(mp, fspec);
+	return 0;
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+static int
+iso_mountfs(devvp, mp, td)
+	struct vnode *devvp;
+	struct mount *mp;
+	struct thread *td;
+{
+	struct iso_mnt *isomp = (struct iso_mnt *)0;
+	struct buf *bp = NULL;
+	struct buf *pribp = NULL, *supbp = NULL;
+	struct cdev *dev = devvp->v_rdev;
+	int error = EINVAL;
+	int high_sierra = 0;
+	int iso_bsize;
+	int iso_blknum;
+	int joliet_level;
+	struct iso_volume_descriptor *vdp = 0;
+	struct iso_primary_descriptor *pri = NULL;
+	struct iso_sierra_primary_descriptor *pri_sierra = NULL;
+	struct iso_supplementary_descriptor *sup = NULL;
+	struct iso_directory_record *rootp;
+	int logical_block_size, ssector;
+	struct g_consumer *cp;
+	struct bufobj *bo;
+	char *cs_local, *cs_disk;
+
+	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
+	DROP_GIANT();
+	g_topology_lock();
+	error = g_vfs_open(devvp, &cp, "cd9660", 0);
+	g_topology_unlock();
+	PICKUP_GIANT();
+	VOP_UNLOCK(devvp, 0, td);
+	if (error)
+		return error;
+	if (devvp->v_rdev->si_iosize_max != 0)
+		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
+	if (mp->mnt_iosize_max > MAXPHYS)
+		mp->mnt_iosize_max = MAXPHYS;
+
+	bo = &devvp->v_bufobj;
+	bo->bo_private = cp;
+	bo->bo_ops = g_vfs_bufops;
+
+	/* This is the "logical sector size".  The standard says this
+	 * should be 2048 or the physical sector size on the device,
+	 * whichever is greater.
+	 */
+	if ((ISO_DEFAULT_BLOCK_SIZE % cp->provider->sectorsize) != 0) {
+		DROP_GIANT();
+		g_topology_lock();
+		g_vfs_close(cp, td);
+		g_topology_unlock();
+                PICKUP_GIANT();
+		return (EINVAL);
+	}
+
+	iso_bsize = cp->provider->sectorsize;
+
+	joliet_level = 0;
+	if (1 != vfs_scanopt(mp->mnt_optnew, "ssector", "%d", &ssector))
+		ssector = 0;
+	for (iso_blknum = 16 + ssector;
+	     iso_blknum < 100 + ssector;
+	     iso_blknum++) {
+		if ((error = bread(devvp, iso_blknum * btodb(ISO_DEFAULT_BLOCK_SIZE),
+				  iso_bsize, NOCRED, &bp)) != 0)
+			goto out;
+
+		vdp = (struct iso_volume_descriptor *)bp->b_data;
+		if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) {
+			if (bcmp (vdp->id_sierra, ISO_SIERRA_ID,
+				  sizeof vdp->id) != 0) {
+				error = EINVAL;
+				goto out;
+			} else
+				high_sierra = 1;
+		}
+		switch (isonum_711 (high_sierra? vdp->type_sierra: vdp->type)){
+		case ISO_VD_PRIMARY:
+			if (pribp == NULL) {
+				pribp = bp;
+				bp = NULL;
+				pri = (struct iso_primary_descriptor *)vdp;
+				pri_sierra =
+				  (struct iso_sierra_primary_descriptor *)vdp;
+			}
+			break;
+
+		case ISO_VD_SUPPLEMENTARY:
+			if (supbp == NULL) {
+				supbp = bp;
+				bp = NULL;
+				sup = (struct iso_supplementary_descriptor *)vdp;
+
+				if (!vfs_flagopt(mp->mnt_optnew, "nojoliet", NULL, 0)) {
+					if (bcmp(sup->escape, "%/@", 3) == 0)
+						joliet_level = 1;
+					if (bcmp(sup->escape, "%/C", 3) == 0)
+						joliet_level = 2;
+					if (bcmp(sup->escape, "%/E", 3) == 0)
+						joliet_level = 3;
+
+					if ((isonum_711 (sup->flags) & 1) &&
+					    !vfs_flagopt(mp->mnt_optnew, "brokenjoliet", NULL, 0))
+						joliet_level = 0;
+				}
+			}
+			break;
+
+		case ISO_VD_END:
+			goto vd_end;
+
+		default:
+			break;
+		}
+		if (bp) {
+			brelse(bp);
+			bp = NULL;
+		}
+	}
+ vd_end:
+	if (bp) {
+		brelse(bp);
+		bp = NULL;
+	}
+
+	if (pri == NULL) {
+		error = EINVAL;
+		goto out;
+	}
+
+	logical_block_size =
+		isonum_723 (high_sierra?
+			    pri_sierra->logical_block_size:
+			    pri->logical_block_size);
+
+	if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE
+	    || (logical_block_size & (logical_block_size - 1)) != 0) {
+		error = EINVAL;
+		goto out;
+	}
+
+	rootp = (struct iso_directory_record *)
+		(high_sierra?
+		 pri_sierra->root_directory_record:
+		 pri->root_directory_record);
+
+	isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK | M_ZERO);
+	isomp->im_cp = cp;
+	isomp->im_bo = bo;
+	isomp->logical_block_size = logical_block_size;
+	isomp->volume_space_size =
+		isonum_733 (high_sierra?
+			    pri_sierra->volume_space_size:
+			    pri->volume_space_size);
+	isomp->joliet_level = 0;
+	/*
+	 * Since an ISO9660 multi-session CD can also access previous
+	 * sessions, we have to include them into the space consider-
+	 * ations.  This doesn't yield a very accurate number since
+	 * parts of the old sessions might be inaccessible now, but we
+	 * can't do much better.  This is also important for the NFS
+	 * filehandle validation.
+	 */
+	isomp->volume_space_size += ssector;
+	bcopy (rootp, isomp->root, sizeof isomp->root);
+	isomp->root_extent = isonum_733 (rootp->extent);
+	isomp->root_size = isonum_733 (rootp->size);
+
+	isomp->im_bmask = logical_block_size - 1;
+	isomp->im_bshift = ffs(logical_block_size) - 1;
+
+	pribp->b_flags |= B_AGE;
+	brelse(pribp);
+	pribp = NULL;
+
+	mp->mnt_data = (qaddr_t)isomp;
+	mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
+	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
+	mp->mnt_maxsymlinklen = 0;
+	MNT_ILOCK(mp);
+	mp->mnt_flag |= MNT_LOCAL;
+	MNT_IUNLOCK(mp);
+	isomp->im_mountp = mp;
+	isomp->im_dev = dev;
+	isomp->im_devvp = devvp;
+
+	vfs_flagopt(mp->mnt_optnew, "norrip", &isomp->im_flags, ISOFSMNT_NORRIP);
+	vfs_flagopt(mp->mnt_optnew, "gens", &isomp->im_flags, ISOFSMNT_GENS);
+	vfs_flagopt(mp->mnt_optnew, "extatt", &isomp->im_flags, ISOFSMNT_EXTATT);
+	vfs_flagopt(mp->mnt_optnew, "nojoliet", &isomp->im_flags, ISOFSMNT_NOJOLIET);
+	vfs_flagopt(mp->mnt_optnew, "kiconv", &isomp->im_flags, ISOFSMNT_KICONV);
+
+	/* Check the Rock Ridge Extension support */
+	if (!(isomp->im_flags & ISOFSMNT_NORRIP)) {
+		if ((error = bread(isomp->im_devvp,
+				  (isomp->root_extent + isonum_711(rootp->ext_attr_length)) <<
+				  (isomp->im_bshift - DEV_BSHIFT),
+				  isomp->logical_block_size, NOCRED, &bp)) != 0)
+		    goto out;
+
+		rootp = (struct iso_directory_record *)bp->b_data;
+
+		if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) {
+		    isomp->im_flags |= ISOFSMNT_NORRIP;
+		} else {
+		    isomp->im_flags &= ~ISOFSMNT_GENS;
+		}
+
+		/*
+		 * The contents are valid,
+		 * but they will get reread as part of another vnode, so...
+		 */
+		bp->b_flags |= B_AGE;
+		brelse(bp);
+		bp = NULL;
+	}
+
+	if (isomp->im_flags & ISOFSMNT_KICONV && cd9660_iconv) {
+		cs_local = vfs_getopts(mp->mnt_optnew, "cs_local", &error);
+		if (error)
+			goto out;
+		cs_disk = vfs_getopts(mp->mnt_optnew, "cs_disk", &error);
+		if (error)
+			goto out;
+		cd9660_iconv->open(cs_local, cs_disk, &isomp->im_d2l);
+		cd9660_iconv->open(cs_disk, cs_local, &isomp->im_l2d);
+	} else {
+		isomp->im_d2l = NULL;
+		isomp->im_l2d = NULL;
+	}
+
+	if (high_sierra) {
+		/* this effectively ignores all the mount flags */
+		if (bootverbose)
+			log(LOG_INFO, "cd9660: High Sierra Format\n");
+		isomp->iso_ftype = ISO_FTYPE_HIGH_SIERRA;
+	} else
+		switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) {
+		  default:
+			  isomp->iso_ftype = ISO_FTYPE_DEFAULT;
+			  break;
+		  case ISOFSMNT_GENS|ISOFSMNT_NORRIP:
+			  isomp->iso_ftype = ISO_FTYPE_9660;
+			  break;
+		  case 0:
+			  if (bootverbose)
+			  	  log(LOG_INFO, "cd9660: RockRidge Extension\n");
+			  isomp->iso_ftype = ISO_FTYPE_RRIP;
+			  break;
+		}
+
+	/* Decide whether to use the Joliet descriptor */
+
+	if (isomp->iso_ftype != ISO_FTYPE_RRIP && joliet_level) {
+		if (bootverbose)
+			log(LOG_INFO, "cd9660: Joliet Extension (Level %d)\n",
+			    joliet_level);
+		rootp = (struct iso_directory_record *)
+			sup->root_directory_record;
+		bcopy (rootp, isomp->root, sizeof isomp->root);
+		isomp->root_extent = isonum_733 (rootp->extent);
+		isomp->root_size = isonum_733 (rootp->size);
+		isomp->joliet_level = joliet_level;
+		supbp->b_flags |= B_AGE;
+	}
+
+	if (supbp) {
+		brelse(supbp);
+		supbp = NULL;
+	}
+
+	return 0;
+out:
+	if (bp)
+		brelse(bp);
+	if (pribp)
+		brelse(pribp);
+	if (supbp)
+		brelse(supbp);
+	if (cp != NULL) {
+		DROP_GIANT();
+		g_topology_lock();
+		g_vfs_close(cp, td);
+		g_topology_unlock();
+		PICKUP_GIANT();
+	}
+	if (isomp) {
+		free((caddr_t)isomp, M_ISOFSMNT);
+		mp->mnt_data = (qaddr_t)0;
+	}
+	return error;
+}
+
+/*
+ * unmount system call
+ */
+static int
+cd9660_unmount(mp, mntflags, td)
+	struct mount *mp;
+	int mntflags;
+	struct thread *td;
+{
+	struct iso_mnt *isomp;
+	int error, flags = 0;
+
+	if (mntflags & MNT_FORCE)
+		flags |= FORCECLOSE;
+#if 0
+	mntflushbuf(mp, 0);
+	if (mntinvalbuf(mp))
+		return EBUSY;
+#endif
+	if ((error = vflush(mp, 0, flags, td)))
+		return (error);
+
+	isomp = VFSTOISOFS(mp);
+
+	if (isomp->im_flags & ISOFSMNT_KICONV && cd9660_iconv) {
+		if (isomp->im_d2l)
+			cd9660_iconv->close(isomp->im_d2l);
+		if (isomp->im_l2d)
+			cd9660_iconv->close(isomp->im_l2d);
+	}
+	DROP_GIANT();
+	g_topology_lock();
+	g_vfs_close(isomp->im_cp, td);
+	g_topology_unlock();
+	PICKUP_GIANT();
+	vrele(isomp->im_devvp);
+	free((caddr_t)isomp, M_ISOFSMNT);
+	mp->mnt_data = (qaddr_t)0;
+	MNT_ILOCK(mp);
+	mp->mnt_flag &= ~MNT_LOCAL;
+	MNT_IUNLOCK(mp);
+	return (error);
+}
+
+/*
+ * Return root of a filesystem
+ */
+static int
+cd9660_root(mp, flags, vpp, td)
+	struct mount *mp;
+	int flags;
+	struct vnode **vpp;
+	struct thread *td;
+{
+	struct iso_mnt *imp = VFSTOISOFS(mp);
+	struct iso_directory_record *dp =
+	    (struct iso_directory_record *)imp->root;
+	ino_t ino = isodirino(dp, imp);
+
+	/*
+	 * With RRIP we must use the `.' entry of the root directory.
+	 * Simply tell vget, that it's a relocated directory.
+	 */
+	return (cd9660_vget_internal(mp, ino, LK_EXCLUSIVE, vpp,
+	    imp->iso_ftype == ISO_FTYPE_RRIP, dp));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+static int
+cd9660_statfs(mp, sbp, td)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct thread *td;
+{
+	struct iso_mnt *isomp;
+
+	isomp = VFSTOISOFS(mp);
+
+	sbp->f_bsize = isomp->logical_block_size;
+	sbp->f_iosize = sbp->f_bsize;	/* XXX */
+	sbp->f_blocks = isomp->volume_space_size;
+	sbp->f_bfree = 0; /* total free blocks */
+	sbp->f_bavail = 0; /* blocks free for non superuser */
+	sbp->f_files =	0; /* total files */
+	sbp->f_ffree = 0; /* free file nodes */
+	return 0;
+}
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the inode number is in range
+ * - call iget() to get the locked inode
+ * - check for an unallocated inode (i_mode == 0)
+ * - check that the generation number matches
+ */
+
+/* ARGSUSED */
+static int
+cd9660_fhtovp(mp, fhp, vpp)
+	struct mount *mp;
+	struct fid *fhp;
+	struct vnode **vpp;
+{
+	struct ifid *ifhp = (struct ifid *)fhp;
+	struct iso_node *ip;
+	struct vnode *nvp;
+	int error;
+
+#ifdef	ISOFS_DBG
+	printf("fhtovp: ino %d, start %ld\n",
+	       ifhp->ifid_ino, ifhp->ifid_start);
+#endif
+
+	if ((error = VFS_VGET(mp, ifhp->ifid_ino, LK_EXCLUSIVE, &nvp)) != 0) {
+		*vpp = NULLVP;
+		return (error);
+	}
+	ip = VTOI(nvp);
+	if (ip->inode.iso_mode == 0) {
+		vput(nvp);
+		*vpp = NULLVP;
+		return (ESTALE);
+	}
+	*vpp = nvp;
+	vnode_create_vobject(*vpp, ip->i_size, curthread);
+	return (0);
+}
+
+static int
+cd9660_vget(mp, ino, flags, vpp)
+	struct mount *mp;
+	ino_t ino;
+	int flags;
+	struct vnode **vpp;
+{
+
+	/*
+	 * XXXX
+	 * It would be nice if we didn't always set the `relocated' flag
+	 * and force the extra read, but I don't want to think about fixing
+	 * that right now.
+	 */
+	return (cd9660_vget_internal(mp, ino, flags, vpp,
+#if 0
+	    VFSTOISOFS(mp)->iso_ftype == ISO_FTYPE_RRIP,
+#else
+	    0,
+#endif
+	    (struct iso_directory_record *)0));
+}
+
+int
+cd9660_vget_internal(mp, ino, flags, vpp, relocated, isodir)
+	struct mount *mp;
+	ino_t ino;
+	int flags;
+	struct vnode **vpp;
+	int relocated;
+	struct iso_directory_record *isodir;
+{
+	struct iso_mnt *imp;
+	struct iso_node *ip;
+	struct buf *bp;
+	struct vnode *vp;
+	struct cdev *dev;
+	int error;
+	struct thread *td;
+
+	td = curthread;
+	error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL);
+	if (error || *vpp != NULL)
+		return (error);
+
+	imp = VFSTOISOFS(mp);
+	dev = imp->im_dev;
+
+	/* Allocate a new vnode/iso_node. */
+	if ((error = getnewvnode("isofs", mp, &cd9660_vnodeops, &vp)) != 0) {
+		*vpp = NULLVP;
+		return (error);
+	}
+	MALLOC(ip, struct iso_node *, sizeof(struct iso_node), M_ISOFSNODE,
+	    M_WAITOK | M_ZERO);
+	vp->v_data = ip;
+	ip->i_vnode = vp;
+	ip->i_number = ino;
+
+	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL, td);
+	error = insmntque(vp, mp);
+	if (error != 0) {
+		free(ip, M_ISOFSNODE);
+		*vpp = NULLVP;
+		return (error);
+	}
+	error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL);
+	if (error || *vpp != NULL)
+		return (error);
+
+	if (isodir == 0) {
+		int lbn, off;
+
+		lbn = lblkno(imp, ino);
+		if (lbn >= imp->volume_space_size) {
+			vput(vp);
+			printf("fhtovp: lbn exceed volume space %d\n", lbn);
+			return (ESTALE);
+		}
+
+		off = blkoff(imp, ino);
+		if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) {
+			vput(vp);
+			printf("fhtovp: crosses block boundary %d\n",
+			       off + ISO_DIRECTORY_RECORD_SIZE);
+			return (ESTALE);
+		}
+
+		error = bread(imp->im_devvp,
+			      lbn << (imp->im_bshift - DEV_BSHIFT),
+			      imp->logical_block_size, NOCRED, &bp);
+		if (error) {
+			vput(vp);
+			brelse(bp);
+			printf("fhtovp: bread error %d\n",error);
+			return (error);
+		}
+		isodir = (struct iso_directory_record *)(bp->b_data + off);
+
+		if (off + isonum_711(isodir->length) >
+		    imp->logical_block_size) {
+			vput(vp);
+			if (bp != 0)
+				brelse(bp);
+			printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n",
+			       off +isonum_711(isodir->length), off,
+			       isonum_711(isodir->length));
+			return (ESTALE);
+		}
+
+#if 0
+		if (isonum_733(isodir->extent) +
+		    isonum_711(isodir->ext_attr_length) != ifhp->ifid_start) {
+			if (bp != 0)
+				brelse(bp);
+			printf("fhtovp: file start miss %d vs %d\n",
+			       isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length),
+			       ifhp->ifid_start);
+			return (ESTALE);
+		}
+#endif
+	} else
+		bp = 0;
+
+	ip->i_mnt = imp;
+	VREF(imp->im_devvp);
+
+	if (relocated) {
+		/*
+		 * On relocated directories we must
+		 * read the `.' entry out of a dir.
+		 */
+		ip->iso_start = ino >> imp->im_bshift;
+		if (bp != 0)
+			brelse(bp);
+		if ((error = cd9660_blkatoff(vp, (off_t)0, NULL, &bp)) != 0) {
+			vput(vp);
+			return (error);
+		}
+		isodir = (struct iso_directory_record *)bp->b_data;
+	}
+
+	ip->iso_extent = isonum_733(isodir->extent);
+	ip->i_size = isonum_733(isodir->size);
+	ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent;
+
+	/*
+	 * Setup time stamp, attribute
+	 */
+	vp->v_type = VNON;
+	switch (imp->iso_ftype) {
+	default:	/* ISO_FTYPE_9660 */
+	    {
+		struct buf *bp2;
+		int off;
+		if ((imp->im_flags & ISOFSMNT_EXTATT)
+		    && (off = isonum_711(isodir->ext_attr_length)))
+			cd9660_blkatoff(vp, (off_t)-(off << imp->im_bshift), NULL,
+				     &bp2);
+		else
+			bp2 = NULL;
+		cd9660_defattr(isodir, ip, bp2, ISO_FTYPE_9660);
+		cd9660_deftstamp(isodir, ip, bp2, ISO_FTYPE_9660);
+		if (bp2)
+			brelse(bp2);
+		break;
+	    }
+	case ISO_FTYPE_RRIP:
+		cd9660_rrip_analyze(isodir, ip, imp);
+		break;
+	}
+
+	if (bp != 0)
+		brelse(bp);
+
+	/*
+	 * Initialize the associated vnode
+	 */
+	switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) {
+	case VFIFO:
+		vp->v_op = &cd9660_fifoops;
+		break;
+	default:
+		break;
+	}
+
+	if (ip->iso_extent == imp->root_extent)
+		vp->v_vflag |= VV_ROOT;
+
+	/*
+	 * XXX need generation number?
+	 */
+
+	*vpp = vp;
+	return (0);
+}
--- /dev/null
+++ sys/fs/cd9660/cd9660_vnops.c
@@ -0,0 +1,862 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace at blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai at spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_vnops.c	8.19 (Berkeley) 5/27/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/cd9660/cd9660_vnops.c,v 1.113 2007/02/15 22:08:34 pjd Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/stat.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <fs/fifofs/fifo.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/unistd.h>
+#include <sys/filio.h>
+
+#include <vm/vm.h>
+#include <vm/vnode_pager.h>
+#include <vm/uma.h>
+
+#include <fs/cd9660/iso.h>
+#include <fs/cd9660/cd9660_node.h>
+#include <fs/cd9660/iso_rrip.h>
+
+static vop_setattr_t	cd9660_setattr;
+static vop_open_t	cd9660_open;
+static vop_access_t	cd9660_access;
+static vop_getattr_t	cd9660_getattr;
+static vop_ioctl_t	cd9660_ioctl;
+static vop_pathconf_t	cd9660_pathconf;
+static vop_read_t	cd9660_read;
+struct isoreaddir;
+static int iso_uiodir(struct isoreaddir *idp, struct dirent *dp, off_t off);
+static int iso_shipdir(struct isoreaddir *idp);
+static vop_readdir_t	cd9660_readdir;
+static vop_readlink_t	cd9660_readlink;
+static vop_strategy_t	cd9660_strategy;
+static vop_vptofh_t	cd9660_vptofh;
+
+/*
+ * Setattr call. Only allowed for block and character special devices.
+ */
+static int
+cd9660_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct thread *a_td;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct vattr *vap = ap->a_vap;
+
+	if (vap->va_flags != (u_long)VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
+	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
+	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)
+		return (EROFS);
+	if (vap->va_size != (u_quad_t)VNOVAL) {
+		switch (vp->v_type) {
+		case VDIR:
+			return (EISDIR);
+		case VLNK:
+		case VREG:
+			return (EROFS);
+		case VCHR:
+		case VBLK:
+		case VSOCK:
+		case VFIFO:
+		case VNON:
+		case VBAD:
+		case VMARKER:
+			return (0);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
+ * The mode is shifted to select the owner/group/other fields. The
+ * super user is granted all permissions.
+ */
+/* ARGSUSED */
+static int
+cd9660_access(ap)
+	struct vop_access_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct thread *a_td;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct iso_node *ip = VTOI(vp);
+	mode_t mode = ap->a_mode;
+
+	if (vp->v_type == VCHR || vp->v_type == VBLK)
+		return (EOPNOTSUPP);
+
+	/*
+	 * Disallow write attempts unless the file is a socket,
+	 * fifo, or a block or character device resident on the
+	 * filesystem.
+	 */
+	if (mode & VWRITE) {
+		switch (vp->v_type) {
+		case VDIR:
+		case VLNK:
+		case VREG:
+			return (EROFS);
+			/* NOT REACHED */
+		default:
+			break;
+		}
+	}
+
+	return (vaccess(vp->v_type, ip->inode.iso_mode, ip->inode.iso_uid,
+	    ip->inode.iso_gid, ap->a_mode, ap->a_cred, NULL));
+}
+
+static int
+cd9660_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct thread *a_td;
+		int a_fdidx;
+	} */ *ap;
+{
+	struct iso_node *ip = VTOI(ap->a_vp);
+
+	vnode_create_vobject(ap->a_vp, ip->i_size, ap->a_td);
+	return 0;
+}
+
+
+static int
+cd9660_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct thread *a_td;
+	} */ *ap;
+
+{
+	struct vnode *vp = ap->a_vp;
+	struct vattr *vap = ap->a_vap;
+	struct iso_node *ip = VTOI(vp);
+
+	vap->va_fsid    = dev2udev(ip->i_mnt->im_dev);
+	vap->va_fileid	= ip->i_number;
+
+	vap->va_mode	= ip->inode.iso_mode;
+	vap->va_nlink	= ip->inode.iso_links;
+	vap->va_uid	= ip->inode.iso_uid;
+	vap->va_gid	= ip->inode.iso_gid;
+	vap->va_atime	= ip->inode.iso_atime;
+	vap->va_mtime	= ip->inode.iso_mtime;
+	vap->va_ctime	= ip->inode.iso_ctime;
+	vap->va_rdev	= ip->inode.iso_rdev;
+
+	vap->va_size	= (u_quad_t) ip->i_size;
+	if (ip->i_size == 0 && (vap->va_mode & S_IFMT) == S_IFLNK) {
+		struct vop_readlink_args rdlnk;
+		struct iovec aiov;
+		struct uio auio;
+		char *cp;
+
+		MALLOC(cp, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
+		aiov.iov_base = cp;
+		aiov.iov_len = MAXPATHLEN;
+		auio.uio_iov = &aiov;
+		auio.uio_iovcnt = 1;
+		auio.uio_offset = 0;
+		auio.uio_rw = UIO_READ;
+		auio.uio_segflg = UIO_SYSSPACE;
+		auio.uio_td = ap->a_td;
+		auio.uio_resid = MAXPATHLEN;
+		rdlnk.a_uio = &auio;
+		rdlnk.a_vp = ap->a_vp;
+		rdlnk.a_cred = ap->a_cred;
+		if (cd9660_readlink(&rdlnk) == 0)
+			vap->va_size = MAXPATHLEN - auio.uio_resid;
+		FREE(cp, M_TEMP);
+	}
+	vap->va_flags	= 0;
+	vap->va_gen = 1;
+	vap->va_blocksize = ip->i_mnt->logical_block_size;
+	vap->va_bytes	= (u_quad_t) ip->i_size;
+	vap->va_type	= vp->v_type;
+	vap->va_filerev	= 0;
+	return (0);
+}
+
+/*
+ * Vnode op for ioctl.
+ */
+static int
+cd9660_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		u_long  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct thread *a_td;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct iso_node *ip = VTOI(vp);
+
+	if (vp->v_type == VCHR || vp->v_type == VBLK)
+		return (EOPNOTSUPP);
+
+	switch (ap->a_command) {
+
+	case FIOGETLBA:
+		*(int *)(ap->a_data) = ip->iso_start;
+		return 0;
+	default:
+		return (ENOTTY);
+	}
+}
+
+/*
+ * Vnode op for reading.
+ */
+static int
+cd9660_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct uio *uio = ap->a_uio;
+	struct iso_node *ip = VTOI(vp);
+	struct iso_mnt *imp;
+	struct buf *bp;
+	daddr_t lbn, rablock;
+	off_t diff;
+	int rasize, error = 0;
+	int seqcount;
+	long size, n, on;
+
+	if (vp->v_type == VCHR || vp->v_type == VBLK)
+		return (EOPNOTSUPP);
+
+	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
+
+	if (uio->uio_resid == 0)
+		return (0);
+	if (uio->uio_offset < 0)
+		return (EINVAL);
+	ip->i_flag |= IN_ACCESS;
+	imp = ip->i_mnt;
+	do {
+		lbn = lblkno(imp, uio->uio_offset);
+		on = blkoff(imp, uio->uio_offset);
+		n = min((u_int)(imp->logical_block_size - on),
+			uio->uio_resid);
+		diff = (off_t)ip->i_size - uio->uio_offset;
+		if (diff <= 0)
+			return (0);
+		if (diff < n)
+			n = diff;
+		size = blksize(imp, ip, lbn);
+		rablock = lbn + 1;
+		if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+			if (lblktosize(imp, rablock) < ip->i_size)
+				error = cluster_read(vp, (off_t)ip->i_size,
+					 lbn, size, NOCRED, uio->uio_resid,
+					 (ap->a_ioflag >> 16), &bp);
+			else
+				error = bread(vp, lbn, size, NOCRED, &bp);
+		} else {
+			if (seqcount > 1 &&
+			    lblktosize(imp, rablock) < ip->i_size) {
+				rasize = blksize(imp, ip, rablock);
+				error = breadn(vp, lbn, size, &rablock,
+					       &rasize, 1, NOCRED, &bp);
+			} else
+				error = bread(vp, lbn, size, NOCRED, &bp);
+		}
+		n = min(n, size - bp->b_resid);
+		if (error) {
+			brelse(bp);
+			return (error);
+		}
+
+		error = uiomove(bp->b_data + on, (int)n, uio);
+		brelse(bp);
+	} while (error == 0 && uio->uio_resid > 0 && n != 0);
+	return (error);
+}
+
+/*
+ * Structure for reading directories
+ */
+struct isoreaddir {
+	struct dirent saveent;
+	struct dirent assocent;
+	struct dirent current;
+	off_t saveoff;
+	off_t assocoff;
+	off_t curroff;
+	struct uio *uio;
+	off_t uio_off;
+	int eofflag;
+	u_long *cookies;
+	int ncookies;
+};
+
+static int
+iso_uiodir(idp,dp,off)
+	struct isoreaddir *idp;
+	struct dirent *dp;
+	off_t off;
+{
+	int error;
+
+	dp->d_name[dp->d_namlen] = 0;
+	dp->d_reclen = GENERIC_DIRSIZ(dp);
+
+	if (idp->uio->uio_resid < dp->d_reclen) {
+		idp->eofflag = 0;
+		return (-1);
+	}
+
+	if (idp->cookies) {
+		if (idp->ncookies <= 0) {
+			idp->eofflag = 0;
+			return (-1);
+		}
+
+		*idp->cookies++ = off;
+		--idp->ncookies;
+	}
+
+	if ((error = uiomove(dp, dp->d_reclen, idp->uio)) != 0)
+		return (error);
+	idp->uio_off = off;
+	return (0);
+}
+
+static int
+iso_shipdir(idp)
+	struct isoreaddir *idp;
+{
+	struct dirent *dp;
+	int cl, sl, assoc;
+	int error;
+	char *cname, *sname;
+
+	cl = idp->current.d_namlen;
+	cname = idp->current.d_name;
+assoc = (cl > 1) && (*cname == ASSOCCHAR);
+	if (assoc) {
+		cl--;
+		cname++;
+	}
+
+	dp = &idp->saveent;
+	sname = dp->d_name;
+	if (!(sl = dp->d_namlen)) {
+		dp = &idp->assocent;
+		sname = dp->d_name + 1;
+		sl = dp->d_namlen - 1;
+	}
+	if (sl > 0) {
+		if (sl != cl
+		    || bcmp(sname,cname,sl)) {
+			if (idp->assocent.d_namlen) {
+				if ((error = iso_uiodir(idp,&idp->assocent,idp->assocoff)) != 0)
+					return (error);
+				idp->assocent.d_namlen = 0;
+			}
+			if (idp->saveent.d_namlen) {
+				if ((error = iso_uiodir(idp,&idp->saveent,idp->saveoff)) != 0)
+					return (error);
+				idp->saveent.d_namlen = 0;
+			}
+		}
+	}
+	idp->current.d_reclen = GENERIC_DIRSIZ(&idp->current);
+	if (assoc) {
+		idp->assocoff = idp->curroff;
+		bcopy(&idp->current,&idp->assocent,idp->current.d_reclen);
+	} else {
+		idp->saveoff = idp->curroff;
+		bcopy(&idp->current,&idp->saveent,idp->current.d_reclen);
+	}
+	return (0);
+}
+
+/*
+ * Vnode op for readdir
+ */
+static int
+cd9660_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+		int *a_eofflag;
+		int *a_ncookies;
+		u_long **a_cookies;
+	} */ *ap;
+{
+	struct uio *uio = ap->a_uio;
+	struct isoreaddir *idp;
+	struct vnode *vdp = ap->a_vp;
+	struct iso_node *dp;
+	struct iso_mnt *imp;
+	struct buf *bp = NULL;
+	struct iso_directory_record *ep;
+	int entryoffsetinblock;
+	doff_t endsearch;
+	u_long bmask;
+	int error = 0;
+	int reclen;
+	u_short namelen;
+	int ncookies = 0;
+	u_long *cookies = NULL;
+
+	dp = VTOI(vdp);
+	imp = dp->i_mnt;
+	bmask = imp->im_bmask;
+
+	MALLOC(idp, struct isoreaddir *, sizeof(*idp), M_TEMP, M_WAITOK);
+	idp->saveent.d_namlen = idp->assocent.d_namlen = 0;
+	/*
+	 * XXX
+	 * Is it worth trying to figure out the type?
+	 */
+	idp->saveent.d_type = idp->assocent.d_type = idp->current.d_type =
+	    DT_UNKNOWN;
+	idp->uio = uio;
+	if (ap->a_ncookies == NULL) {
+		idp->cookies = NULL;
+	} else {
+		/*
+		 * Guess the number of cookies needed.
+		 */
+		ncookies = uio->uio_resid / 16;
+		MALLOC(cookies, u_long *, ncookies * sizeof(u_long),
+		    M_TEMP, M_WAITOK);
+		idp->cookies = cookies;
+		idp->ncookies = ncookies;
+	}
+	idp->eofflag = 1;
+	idp->curroff = uio->uio_offset;
+
+	if ((entryoffsetinblock = idp->curroff & bmask) &&
+	    (error = cd9660_blkatoff(vdp, (off_t)idp->curroff, NULL, &bp))) {
+		FREE(idp, M_TEMP);
+		return (error);
+	}
+	endsearch = dp->i_size;
+
+	while (idp->curroff < endsearch) {
+		/*
+		 * If offset is on a block boundary,
+		 * read the next directory block.
+		 * Release previous if it exists.
+		 */
+		if ((idp->curroff & bmask) == 0) {
+			if (bp != NULL)
+				brelse(bp);
+			if ((error =
+			    cd9660_blkatoff(vdp, (off_t)idp->curroff, NULL, &bp)) != 0)
+				break;
+			entryoffsetinblock = 0;
+		}
+		/*
+		 * Get pointer to next entry.
+		 */
+		ep = (struct iso_directory_record *)
+			((char *)bp->b_data + entryoffsetinblock);
+
+		reclen = isonum_711(ep->length);
+		if (reclen == 0) {
+			/* skip to next block, if any */
+			idp->curroff =
+			    (idp->curroff & ~bmask) + imp->logical_block_size;
+			continue;
+		}
+
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE) {
+			error = EINVAL;
+			/* illegal entry, stop */
+			break;
+		}
+
+		if (entryoffsetinblock + reclen > imp->logical_block_size) {
+			error = EINVAL;
+			/* illegal directory, so stop looking */
+			break;
+		}
+
+		idp->current.d_namlen = isonum_711(ep->name_len);
+
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE + idp->current.d_namlen) {
+			error = EINVAL;
+			/* illegal entry, stop */
+			break;
+		}
+
+		if (isonum_711(ep->flags)&2)
+			idp->current.d_fileno = isodirino(ep, imp);
+		else
+			idp->current.d_fileno = dbtob(bp->b_blkno) +
+				entryoffsetinblock;
+
+		idp->curroff += reclen;
+
+		switch (imp->iso_ftype) {
+		case ISO_FTYPE_RRIP:
+			cd9660_rrip_getname(ep,idp->current.d_name, &namelen,
+					   &idp->current.d_fileno,imp);
+			idp->current.d_namlen = (u_char)namelen;
+			if (idp->current.d_namlen)
+				error = iso_uiodir(idp,&idp->current,idp->curroff);
+			break;
+		default: /* ISO_FTYPE_DEFAULT || ISO_FTYPE_9660 || ISO_FTYPE_HIGH_SIERRA*/
+			strcpy(idp->current.d_name,"..");
+			if (idp->current.d_namlen == 1 && ep->name[0] == 0) {
+				idp->current.d_namlen = 1;
+				error = iso_uiodir(idp,&idp->current,idp->curroff);
+			} else if (idp->current.d_namlen == 1 && ep->name[0] == 1) {
+				idp->current.d_namlen = 2;
+				error = iso_uiodir(idp,&idp->current,idp->curroff);
+			} else {
+				isofntrans(ep->name,idp->current.d_namlen,
+					   idp->current.d_name, &namelen,
+					   imp->iso_ftype == ISO_FTYPE_9660,
+					   isonum_711(ep->flags)&4,
+					   imp->joliet_level,
+					   imp->im_flags,
+					   imp->im_d2l);
+				idp->current.d_namlen = (u_char)namelen;
+				if (imp->iso_ftype == ISO_FTYPE_DEFAULT)
+					error = iso_shipdir(idp);
+				else
+					error = iso_uiodir(idp,&idp->current,idp->curroff);
+			}
+		}
+		if (error)
+			break;
+
+		entryoffsetinblock += reclen;
+	}
+
+	if (!error && imp->iso_ftype == ISO_FTYPE_DEFAULT) {
+		idp->current.d_namlen = 0;
+		error = iso_shipdir(idp);
+	}
+	if (error < 0)
+		error = 0;
+
+	if (ap->a_ncookies != NULL) {
+		if (error)
+			free(cookies, M_TEMP);
+		else {
+			/*
+			 * Work out the number of cookies actually used.
+			 */
+			*ap->a_ncookies = ncookies - idp->ncookies;
+			*ap->a_cookies = cookies;
+		}
+	}
+
+	if (bp)
+		brelse (bp);
+
+	uio->uio_offset = idp->uio_off;
+	*ap->a_eofflag = idp->eofflag;
+
+	FREE(idp, M_TEMP);
+
+	return (error);
+}
+
+/*
+ * Return target name of a symbolic link
+ * Shouldn't we get the parent vnode and read the data from there?
+ * This could eventually result in deadlocks in cd9660_lookup.
+ * But otherwise the block read here is in the block buffer two times.
+ */
+typedef struct iso_directory_record ISODIR;
+typedef struct iso_node		    ISONODE;
+typedef struct iso_mnt		    ISOMNT;
+static int
+cd9660_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	ISONODE	*ip;
+	ISODIR	*dirp;
+	ISOMNT	*imp;
+	struct	buf *bp;
+	struct	uio *uio;
+	u_short	symlen;
+	int	error;
+	char	*symname;
+
+	ip  = VTOI(ap->a_vp);
+	imp = ip->i_mnt;
+	uio = ap->a_uio;
+
+	if (imp->iso_ftype != ISO_FTYPE_RRIP)
+		return (EINVAL);
+
+	/*
+	 * Get parents directory record block that this inode included.
+	 */
+	error = bread(imp->im_devvp,
+		      (ip->i_number >> imp->im_bshift) <<
+		      (imp->im_bshift - DEV_BSHIFT),
+		      imp->logical_block_size, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return (EINVAL);
+	}
+
+	/*
+	 * Setup the directory pointer for this inode
+	 */
+	dirp = (ISODIR *)(bp->b_data + (ip->i_number & imp->im_bmask));
+
+	/*
+	 * Just make sure, we have a right one....
+	 *   1: Check not cross boundary on block
+	 */
+	if ((ip->i_number & imp->im_bmask) + isonum_711(dirp->length)
+	    > (unsigned)imp->logical_block_size) {
+		brelse(bp);
+		return (EINVAL);
+	}
+
+	/*
+	 * Now get a buffer
+	 * Abuse a namei buffer for now.
+	 */
+	if (uio->uio_segflg == UIO_SYSSPACE)
+		symname = uio->uio_iov->iov_base;
+	else
+		symname = uma_zalloc(namei_zone, M_WAITOK);
+
+	/*
+	 * Ok, we just gathering a symbolic name in SL record.
+	 */
+	if (cd9660_rrip_getsymname(dirp, symname, &symlen, imp) == 0) {
+		if (uio->uio_segflg != UIO_SYSSPACE)
+			uma_zfree(namei_zone, symname);
+		brelse(bp);
+		return (EINVAL);
+	}
+	/*
+	 * Don't forget before you leave from home ;-)
+	 */
+	brelse(bp);
+
+	/*
+	 * return with the symbolic name to caller's.
+	 */
+	if (uio->uio_segflg != UIO_SYSSPACE) {
+		error = uiomove(symname, symlen, uio);
+		uma_zfree(namei_zone, symname);
+		return (error);
+	}
+	uio->uio_resid -= symlen;
+	uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + symlen;
+	uio->uio_iov->iov_len -= symlen;
+	return (0);
+}
+
+/*
+ * Calculate the logical to physical mapping if not done already,
+ * then call the device strategy routine.
+ */
+static int
+cd9660_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_vp;
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	struct vnode *vp = ap->a_vp;
+	struct iso_node *ip;
+	struct bufobj *bo;
+
+	ip = VTOI(vp);
+	if (vp->v_type == VBLK || vp->v_type == VCHR)
+		panic("cd9660_strategy: spec");
+	if (bp->b_blkno == bp->b_lblkno) {
+		bp->b_blkno = (ip->iso_start + bp->b_lblkno) <<
+		    (ip->i_mnt->im_bshift - DEV_BSHIFT);
+		if ((long)bp->b_blkno == -1)	/* XXX: cut&paste junk ? */
+			clrbuf(bp);
+	}
+	if ((long)bp->b_blkno == -1) {	/* XXX: cut&paste junk ? */
+		bufdone(bp);
+		return (0);
+	}
+	bp->b_iooffset = dbtob(bp->b_blkno);
+	bo = ip->i_mnt->im_bo;
+	BO_STRATEGY(bo, bp);
+	return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to cd9660 filesystems.
+ */
+static int
+cd9660_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		register_t *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_NAME_MAX:
+		if (VTOI(ap->a_vp)->i_mnt->iso_ftype == ISO_FTYPE_RRIP)
+			*ap->a_retval = NAME_MAX;
+		else
+			*ap->a_retval = 37;
+		return (0);
+	case _PC_PATH_MAX:
+		*ap->a_retval = PATH_MAX;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_NO_TRUNC:
+		*ap->a_retval = 1;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Vnode pointer to File handle
+ */
+static int
+cd9660_vptofh(ap)
+	struct vop_vptofh_args /* {
+		struct vnode *a_vp;
+		struct fid *a_fhp;
+	} */ *ap;
+{
+	struct iso_node *ip = VTOI(ap->a_vp);
+	struct ifid *ifhp;
+
+	ifhp = (struct ifid *)ap->a_fhp;
+	ifhp->ifid_len = sizeof(struct ifid);
+
+	ifhp->ifid_ino = ip->i_number;
+	ifhp->ifid_start = ip->iso_start;
+
+#ifdef	ISOFS_DBG
+	printf("vptofh: ino %d, start %ld\n",
+	       ifhp->ifid_ino,ifhp->ifid_start);
+#endif
+	return 0;
+}
+
+/*
+ * Global vfs data structures for cd9660
+ */
+struct vop_vector cd9660_vnodeops = {
+	.vop_default =		&default_vnodeops,
+	.vop_open =		cd9660_open,
+	.vop_access =		cd9660_access,
+	.vop_bmap =		cd9660_bmap,
+	.vop_cachedlookup =	cd9660_lookup,
+	.vop_getattr =		cd9660_getattr,
+	.vop_inactive =		cd9660_inactive,
+	.vop_ioctl =		cd9660_ioctl,
+	.vop_lookup =		vfs_cache_lookup,
+	.vop_pathconf =		cd9660_pathconf,
+	.vop_read =		cd9660_read,
+	.vop_readdir =		cd9660_readdir,
+	.vop_readlink =		cd9660_readlink,
+	.vop_reclaim =		cd9660_reclaim,
+	.vop_setattr =		cd9660_setattr,
+	.vop_strategy =		cd9660_strategy,
+	.vop_vptofh =		cd9660_vptofh,
+};
+
+/*
+ * Special device vnode ops
+ */
+
+struct vop_vector cd9660_fifoops = {
+	.vop_default =		&fifo_specops,
+	.vop_access =		cd9660_access,
+	.vop_getattr =		cd9660_getattr,
+	.vop_inactive =		cd9660_inactive,
+	.vop_reclaim =		cd9660_reclaim,
+	.vop_setattr =		cd9660_setattr,
+	.vop_vptofh =		cd9660_vptofh,
+};
--- /dev/null
+++ sys/fs/cd9660/cd9660_node.c
@@ -0,0 +1,332 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1994, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace at blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai at spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_node.c	8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/cd9660/cd9660_node.c,v 1.56 2007/02/11 13:54:25 rodrigc Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/stat.h>
+#include <sys/mutex.h>
+
+#include <fs/cd9660/iso.h>
+#include <fs/cd9660/cd9660_node.h>
+#include <fs/cd9660/cd9660_mount.h>
+
+static unsigned	cd9660_chars2ui(unsigned char *begin, int len);
+
+/*
+ * Last reference to an inode, write the inode out and if necessary,
+ * truncate and deallocate the file.
+ */
+int
+cd9660_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct thread *a_td;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct thread *td = ap->a_td;
+	struct iso_node *ip = VTOI(vp);
+	int error = 0;
+
+	if (prtactive && vrefcnt(vp) != 0)
+		vprint("cd9660_inactive: pushing active", vp);
+
+	ip->i_flag = 0;
+	/*
+	 * If we are done with the inode, reclaim it
+	 * so that it can be reused immediately.
+	 */
+	if (ip->inode.iso_mode == 0)
+		vrecycle(vp, td);
+	return error;
+}
+
+/*
+ * Reclaim an inode so that it can be used for other purposes.
+ */
+int
+cd9660_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+		struct thread *a_td;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct iso_node *ip = VTOI(vp);
+
+	if (prtactive && vrefcnt(vp) != 0)
+		vprint("cd9660_reclaim: pushing active", vp);
+	/*
+	 * Destroy the vm object and flush associated pages.
+	 */
+	vnode_destroy_vobject(vp);
+	/*
+	 * Remove the inode from its hash chain.
+	 */
+	vfs_hash_remove(vp);
+
+	/*
+	 * Purge old data structures associated with the inode.
+	 */
+	if (ip->i_mnt->im_devvp)
+		vrele(ip->i_mnt->im_devvp);
+	FREE(vp->v_data, M_ISOFSNODE);
+	vp->v_data = NULL;
+	return (0);
+}
+
+/*
+ * File attributes
+ */
+void
+cd9660_defattr(isodir, inop, bp, ftype)
+	struct iso_directory_record *isodir;
+	struct iso_node *inop;
+	struct buf *bp;
+	enum ISO_FTYPE ftype;
+{
+	struct buf *bp2 = NULL;
+	struct iso_mnt *imp;
+	struct iso_extended_attributes *ap = NULL;
+	int off;
+
+	/* high sierra does not have timezone data, flag is one byte ahead */
+	if (isonum_711(ftype == ISO_FTYPE_HIGH_SIERRA?
+		       &isodir->date[6]: isodir->flags)&2) {
+		inop->inode.iso_mode = S_IFDIR;
+		/*
+		 * If we return 2, fts() will assume there are no subdirectories
+		 * (just links for the path and .), so instead we return 1.
+		 */
+		inop->inode.iso_links = 1;
+	} else {
+		inop->inode.iso_mode = S_IFREG;
+		inop->inode.iso_links = 1;
+	}
+	if (!bp
+	    && ((imp = inop->i_mnt)->im_flags & ISOFSMNT_EXTATT)
+	    && (off = isonum_711(isodir->ext_attr_length))) {
+		cd9660_blkatoff(ITOV(inop), (off_t)-(off << imp->im_bshift), NULL,
+			     &bp2);
+		bp = bp2;
+	}
+	if (bp) {
+		ap = (struct iso_extended_attributes *)bp->b_data;
+
+		if (isonum_711(ap->version) == 1) {
+			if (!(ap->perm[0]&0x40))
+				inop->inode.iso_mode |= VEXEC >> 6;
+			if (!(ap->perm[0]&0x10))
+				inop->inode.iso_mode |= VREAD >> 6;
+			if (!(ap->perm[0]&4))
+				inop->inode.iso_mode |= VEXEC >> 3;
+			if (!(ap->perm[0]&1))
+				inop->inode.iso_mode |= VREAD >> 3;
+			if (!(ap->perm[1]&0x40))
+				inop->inode.iso_mode |= VEXEC;
+			if (!(ap->perm[1]&0x10))
+				inop->inode.iso_mode |= VREAD;
+			inop->inode.iso_uid = isonum_723(ap->owner); /* what about 0? */
+			inop->inode.iso_gid = isonum_723(ap->group); /* what about 0? */
+		} else
+			ap = NULL;
+	}
+	if (!ap) {
+		inop->inode.iso_mode |= VREAD|VEXEC|(VREAD|VEXEC)>>3|(VREAD|VEXEC)>>6;
+		inop->inode.iso_uid = (uid_t)0;
+		inop->inode.iso_gid = (gid_t)0;
+	}
+	if (bp2)
+		brelse(bp2);
+}
+
+/*
+ * Time stamps
+ */
+void
+cd9660_deftstamp(isodir,inop,bp,ftype)
+	struct iso_directory_record *isodir;
+	struct iso_node *inop;
+	struct buf *bp;
+	enum ISO_FTYPE ftype;
+{
+	struct buf *bp2 = NULL;
+	struct iso_mnt *imp;
+	struct iso_extended_attributes *ap = NULL;
+	int off;
+
+	if (!bp
+	    && ((imp = inop->i_mnt)->im_flags & ISOFSMNT_EXTATT)
+	    && (off = isonum_711(isodir->ext_attr_length))) {
+		cd9660_blkatoff(ITOV(inop), (off_t)-(off << imp->im_bshift), NULL,
+			     &bp2);
+		bp = bp2;
+	}
+	if (bp) {
+		ap = (struct iso_extended_attributes *)bp->b_data;
+
+		if (ftype != ISO_FTYPE_HIGH_SIERRA
+		    && isonum_711(ap->version) == 1) {
+			if (!cd9660_tstamp_conv17(ap->ftime,&inop->inode.iso_atime))
+				cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_atime);
+			if (!cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_ctime))
+				inop->inode.iso_ctime = inop->inode.iso_atime;
+			if (!cd9660_tstamp_conv17(ap->mtime,&inop->inode.iso_mtime))
+				inop->inode.iso_mtime = inop->inode.iso_ctime;
+		} else
+			ap = NULL;
+	}
+	if (!ap) {
+		cd9660_tstamp_conv7(isodir->date,&inop->inode.iso_ctime,ftype);
+		inop->inode.iso_atime = inop->inode.iso_ctime;
+		inop->inode.iso_mtime = inop->inode.iso_ctime;
+	}
+	if (bp2)
+		brelse(bp2);
+}
+
+int
+cd9660_tstamp_conv7(pi,pu,ftype)
+	u_char *pi;
+	struct timespec *pu;
+	enum ISO_FTYPE ftype;
+{
+	int crtime, days;
+	int y, m, d, hour, minute, second, tz;
+
+	y = pi[0] + 1900;
+	m = pi[1];
+	d = pi[2];
+	hour = pi[3];
+	minute = pi[4];
+	second = pi[5];
+	if(ftype != ISO_FTYPE_HIGH_SIERRA)
+		tz = pi[6];
+	else
+		/* original high sierra misses timezone data */
+		tz = 0;
+
+	if (y < 1970) {
+		pu->tv_sec  = 0;
+		pu->tv_nsec = 0;
+		return 0;
+	} else {
+#ifdef	ORIGINAL
+		/* computes day number relative to Sept. 19th,1989 */
+		/* don't even *THINK* about changing formula. It works! */
+		days = 367*(y-1980)-7*(y+(m+9)/12)/4-3*((y+(m-9)/7)/100+1)/4+275*m/9+d-100;
+#else
+		/*
+		 * Changed :-) to make it relative to Jan. 1st, 1970
+		 * and to disambiguate negative division
+		 */
+		days = 367*(y-1960)-7*(y+(m+9)/12)/4-3*((y+(m+9)/12-1)/100+1)/4+275*m/9+d-239;
+#endif
+		crtime = ((((days * 24) + hour) * 60 + minute) * 60) + second;
+
+		/* timezone offset is unreliable on some disks */
+		if (-48 <= tz && tz <= 52)
+			crtime -= tz * 15 * 60;
+	}
+	pu->tv_sec  = crtime;
+	pu->tv_nsec = 0;
+	return 1;
+}
+
+static u_int
+cd9660_chars2ui(begin,len)
+	u_char *begin;
+	int len;
+{
+	u_int rc;
+
+	for (rc = 0; --len >= 0;) {
+		rc *= 10;
+		rc += *begin++ - '0';
+	}
+	return rc;
+}
+
+int
+cd9660_tstamp_conv17(pi,pu)
+	u_char *pi;
+	struct timespec *pu;
+{
+	u_char buf[7];
+
+	/* year:"0001"-"9999" -> -1900  */
+	buf[0] = cd9660_chars2ui(pi,4) - 1900;
+
+	/* month: " 1"-"12"   -> 1 - 12 */
+	buf[1] = cd9660_chars2ui(pi + 4,2);
+
+	/* day:	  " 1"-"31"   -> 1 - 31 */
+	buf[2] = cd9660_chars2ui(pi + 6,2);
+
+	/* hour:  " 0"-"23"   -> 0 - 23 */
+	buf[3] = cd9660_chars2ui(pi + 8,2);
+
+	/* minute:" 0"-"59"   -> 0 - 59 */
+	buf[4] = cd9660_chars2ui(pi + 10,2);
+
+	/* second:" 0"-"59"   -> 0 - 59 */
+	buf[5] = cd9660_chars2ui(pi + 12,2);
+
+	/* difference of GMT */
+	buf[6] = pi[16];
+
+	return cd9660_tstamp_conv7(buf, pu, ISO_FTYPE_DEFAULT);
+}
+
+ino_t
+isodirino(isodir, imp)
+	struct iso_directory_record *isodir;
+	struct iso_mnt *imp;
+{
+	ino_t ino;
+
+	ino = (isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length))
+	      << imp->im_bshift;
+	return (ino);
+}
--- /dev/null
+++ sys/fs/cd9660/iso.h
@@ -0,0 +1,350 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace at blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai at spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso.h	8.6 (Berkeley) 5/10/95
+ * $FreeBSD: src/sys/fs/cd9660/iso.h,v 1.33 2007/02/15 22:08:34 pjd Exp $
+ */
+
+#define ISODCL(from, to) (to - from + 1)
+
+struct iso_volume_descriptor {
+	char type[ISODCL(1,1)]; /* 711 */
+	char id[ISODCL(2,6)];
+	char version[ISODCL(7,7)];
+	char unused[ISODCL(8,8)];
+	char type_sierra[ISODCL(9,9)]; /* 711 */
+	char id_sierra[ISODCL(10,14)];
+	char version_sierra[ISODCL(15,15)];
+	char data[ISODCL(16,2048)];
+};
+
+/* volume descriptor types */
+#define ISO_VD_PRIMARY 1
+#define ISO_VD_SUPPLEMENTARY 2
+#define ISO_VD_END 255
+
+#define ISO_STANDARD_ID "CD001"
+#define ISO_ECMA_ID	"CDW01"
+
+#define ISO_SIERRA_ID	"CDROM"
+
+struct iso_primary_descriptor {
+	char type			[ISODCL (  1,	1)]; /* 711 */
+	char id				[ISODCL (  2,	6)];
+	char version			[ISODCL (  7,	7)]; /* 711 */
+	char unused1			[ISODCL (  8,	8)];
+	char system_id			[ISODCL (  9,  40)]; /* achars */
+	char volume_id			[ISODCL ( 41,  72)]; /* dchars */
+	char unused2			[ISODCL ( 73,  80)];
+	char volume_space_size		[ISODCL ( 81,  88)]; /* 733 */
+	char unused3			[ISODCL ( 89, 120)];
+	char volume_set_size		[ISODCL (121, 124)]; /* 723 */
+	char volume_sequence_number	[ISODCL (125, 128)]; /* 723 */
+	char logical_block_size		[ISODCL (129, 132)]; /* 723 */
+	char path_table_size		[ISODCL (133, 140)]; /* 733 */
+	char type_l_path_table		[ISODCL (141, 144)]; /* 731 */
+	char opt_type_l_path_table	[ISODCL (145, 148)]; /* 731 */
+	char type_m_path_table		[ISODCL (149, 152)]; /* 732 */
+	char opt_type_m_path_table	[ISODCL (153, 156)]; /* 732 */
+	char root_directory_record	[ISODCL (157, 190)]; /* 9.1 */
+	char volume_set_id		[ISODCL (191, 318)]; /* dchars */
+	char publisher_id		[ISODCL (319, 446)]; /* achars */
+	char preparer_id		[ISODCL (447, 574)]; /* achars */
+	char application_id		[ISODCL (575, 702)]; /* achars */
+	char copyright_file_id		[ISODCL (703, 739)]; /* 7.5 dchars */
+	char abstract_file_id		[ISODCL (740, 776)]; /* 7.5 dchars */
+	char bibliographic_file_id	[ISODCL (777, 813)]; /* 7.5 dchars */
+	char creation_date		[ISODCL (814, 830)]; /* 8.4.26.1 */
+	char modification_date		[ISODCL (831, 847)]; /* 8.4.26.1 */
+	char expiration_date		[ISODCL (848, 864)]; /* 8.4.26.1 */
+	char effective_date		[ISODCL (865, 881)]; /* 8.4.26.1 */
+	char file_structure_version	[ISODCL (882, 882)]; /* 711 */
+	char unused4			[ISODCL (883, 883)];
+	char application_data		[ISODCL (884, 1395)];
+	char unused5			[ISODCL (1396, 2048)];
+};
+#define ISO_DEFAULT_BLOCK_SIZE		2048
+
+/*
+ * Used by Microsoft Joliet extension to ISO9660. Almost the same
+ * as PVD, but byte position 8 is a flag, and 89-120 is for escape.
+ */
+
+struct iso_supplementary_descriptor {
+      char type                       [ISODCL (  1,   1)]; /* 711 */
+      char id                         [ISODCL (  2,   6)];
+      char version                    [ISODCL (  7,   7)]; /* 711 */
+      char flags                      [ISODCL (  8,   8)]; /* 711? */
+      char system_id                  [ISODCL (  9,  40)]; /* achars */
+      char volume_id                  [ISODCL ( 41,  72)]; /* dchars */
+      char unused2                    [ISODCL ( 73,  80)];
+      char volume_space_size          [ISODCL ( 81,  88)]; /* 733 */
+      char escape                     [ISODCL ( 89, 120)];
+      char volume_set_size            [ISODCL (121, 124)]; /* 723 */
+      char volume_sequence_number     [ISODCL (125, 128)]; /* 723 */
+      char logical_block_size         [ISODCL (129, 132)]; /* 723 */
+      char path_table_size            [ISODCL (133, 140)]; /* 733 */
+      char type_l_path_table          [ISODCL (141, 144)]; /* 731 */
+      char opt_type_l_path_table      [ISODCL (145, 148)]; /* 731 */
+      char type_m_path_table          [ISODCL (149, 152)]; /* 732 */
+      char opt_type_m_path_table      [ISODCL (153, 156)]; /* 732 */
+      char root_directory_record      [ISODCL (157, 190)]; /* 9.1 */
+      char volume_set_id              [ISODCL (191, 318)]; /* dchars */
+      char publisher_id               [ISODCL (319, 446)]; /* achars */
+      char preparer_id                [ISODCL (447, 574)]; /* achars */
+      char application_id             [ISODCL (575, 702)]; /* achars */
+      char copyright_file_id          [ISODCL (703, 739)]; /* 7.5 dchars */
+      char abstract_file_id           [ISODCL (740, 776)]; /* 7.5 dchars */
+      char bibliographic_file_id      [ISODCL (777, 813)]; /* 7.5 dchars */
+      char creation_date              [ISODCL (814, 830)]; /* 8.4.26.1 */
+      char modification_date          [ISODCL (831, 847)]; /* 8.4.26.1 */
+      char expiration_date            [ISODCL (848, 864)]; /* 8.4.26.1 */
+      char effective_date             [ISODCL (865, 881)]; /* 8.4.26.1 */
+      char file_structure_version     [ISODCL (882, 882)]; /* 711 */
+      char unused4                    [ISODCL (883, 883)];
+      char application_data           [ISODCL (884, 1395)];
+      char unused5                    [ISODCL (1396, 2048)];
+};
+
+struct iso_sierra_primary_descriptor {
+	char unknown1			[ISODCL (  1,	8)]; /* 733 */
+	char type			[ISODCL (  9,	9)]; /* 711 */
+	char id				[ISODCL ( 10,  14)];
+	char version			[ISODCL ( 15,  15)]; /* 711 */
+	char unused1			[ISODCL ( 16,  16)];
+	char system_id			[ISODCL ( 17,  48)]; /* achars */
+	char volume_id			[ISODCL ( 49,  80)]; /* dchars */
+	char unused2			[ISODCL ( 81,  88)];
+	char volume_space_size		[ISODCL ( 89,  96)]; /* 733 */
+	char unused3			[ISODCL ( 97, 128)];
+	char volume_set_size		[ISODCL (129, 132)]; /* 723 */
+	char volume_sequence_number	[ISODCL (133, 136)]; /* 723 */
+	char logical_block_size		[ISODCL (137, 140)]; /* 723 */
+	char path_table_size		[ISODCL (141, 148)]; /* 733 */
+	char type_l_path_table		[ISODCL (149, 152)]; /* 731 */
+	char opt_type_l_path_table	[ISODCL (153, 156)]; /* 731 */
+	char unknown2			[ISODCL (157, 160)]; /* 731 */
+	char unknown3			[ISODCL (161, 164)]; /* 731 */
+	char type_m_path_table		[ISODCL (165, 168)]; /* 732 */
+	char opt_type_m_path_table	[ISODCL (169, 172)]; /* 732 */
+	char unknown4			[ISODCL (173, 176)]; /* 732 */
+	char unknown5			[ISODCL (177, 180)]; /* 732 */
+	char root_directory_record	[ISODCL (181, 214)]; /* 9.1 */
+	char volume_set_id		[ISODCL (215, 342)]; /* dchars */
+	char publisher_id		[ISODCL (343, 470)]; /* achars */
+	char preparer_id		[ISODCL (471, 598)]; /* achars */
+	char application_id		[ISODCL (599, 726)]; /* achars */
+	char copyright_id		[ISODCL (727, 790)]; /* achars */
+	char creation_date		[ISODCL (791, 806)]; /* ? */
+	char modification_date		[ISODCL (807, 822)]; /* ? */
+	char expiration_date		[ISODCL (823, 838)]; /* ? */
+	char effective_date		[ISODCL (839, 854)]; /* ? */
+	char file_structure_version	[ISODCL (855, 855)]; /* 711 */
+	char unused4			[ISODCL (856, 2048)];
+};
+
+struct iso_directory_record {
+	char length			[ISODCL (1, 1)]; /* 711 */
+	char ext_attr_length		[ISODCL (2, 2)]; /* 711 */
+	u_char extent			[ISODCL (3, 10)]; /* 733 */
+	u_char size			[ISODCL (11, 18)]; /* 733 */
+	char date			[ISODCL (19, 25)]; /* 7 by 711 */
+	char flags			[ISODCL (26, 26)];
+	char file_unit_size		[ISODCL (27, 27)]; /* 711 */
+	char interleave			[ISODCL (28, 28)]; /* 711 */
+	char volume_sequence_number	[ISODCL (29, 32)]; /* 723 */
+	char name_len			[ISODCL (33, 33)]; /* 711 */
+	char name			[1];			/* XXX */
+};
+/* can't take sizeof(iso_directory_record), because of possible alignment
+   of the last entry (34 instead of 33) */
+#define ISO_DIRECTORY_RECORD_SIZE	33
+
+struct iso_extended_attributes {
+	u_char owner			[ISODCL (1, 4)]; /* 723 */
+	u_char group			[ISODCL (5, 8)]; /* 723 */
+	u_char perm			[ISODCL (9, 10)]; /* 9.5.3 */
+	char ctime			[ISODCL (11, 27)]; /* 8.4.26.1 */
+	char mtime			[ISODCL (28, 44)]; /* 8.4.26.1 */
+	char xtime			[ISODCL (45, 61)]; /* 8.4.26.1 */
+	char ftime			[ISODCL (62, 78)]; /* 8.4.26.1 */
+	char recfmt			[ISODCL (79, 79)]; /* 711 */
+	char recattr			[ISODCL (80, 80)]; /* 711 */
+	u_char reclen			[ISODCL (81, 84)]; /* 723 */
+	char system_id			[ISODCL (85, 116)]; /* achars */
+	char system_use			[ISODCL (117, 180)];
+	char version			[ISODCL (181, 181)]; /* 711 */
+	char len_esc			[ISODCL (182, 182)]; /* 711 */
+	char reserved			[ISODCL (183, 246)];
+	u_char len_au			[ISODCL (247, 250)]; /* 723 */
+};
+
+#ifdef _KERNEL
+
+/* CD-ROM Format type */
+enum ISO_FTYPE	{ ISO_FTYPE_DEFAULT, ISO_FTYPE_9660, ISO_FTYPE_RRIP,
+		  ISO_FTYPE_JOLIET, ISO_FTYPE_ECMA, ISO_FTYPE_HIGH_SIERRA };
+
+#ifndef	ISOFSMNT_ROOT
+#define	ISOFSMNT_ROOT	0
+#endif
+
+struct iso_mnt {
+	int im_flags;
+
+	struct mount *im_mountp;
+	struct cdev *im_dev;
+	struct vnode *im_devvp;
+
+	struct g_consumer *im_cp;
+	struct bufobj *im_bo;
+
+	int logical_block_size;
+	int im_bshift;
+	int im_bmask;
+
+	int volume_space_size;
+
+	char root[ISODCL (157, 190)];
+	int root_extent;
+	int root_size;
+	enum ISO_FTYPE	iso_ftype;
+
+	int rr_skip;
+	int rr_skip0;
+
+	int joliet_level;
+
+	void *im_d2l;
+	void *im_l2d;
+};
+
+struct ifid {
+	u_short	ifid_len;
+	u_short	ifid_pad;
+	int	ifid_ino;
+	long	ifid_start;
+};
+
+#define VFSTOISOFS(mp)	((struct iso_mnt *)((mp)->mnt_data))
+
+#define blkoff(imp, loc)	((loc) & (imp)->im_bmask)
+#define lblktosize(imp, blk)	((blk) << (imp)->im_bshift)
+#define lblkno(imp, loc)	((loc) >> (imp)->im_bshift)
+#define blksize(imp, ip, lbn)	((imp)->logical_block_size)
+
+int cd9660_vget_internal(struct mount *, ino_t, int, struct vnode **, int,
+			 struct iso_directory_record *);
+#define cd9660_sysctl ((int (*)(int *, u_int, void *, size_t *, void *, \
+				size_t, struct proc *))eopnotsupp)
+
+extern struct vop_vector cd9660_vnodeops;
+extern struct vop_vector cd9660_fifoops;
+
+int isochar(u_char *, u_char *, int, u_short *, int *, int, void *);
+int isofncmp(u_char *, int, u_char *, int, int, int, void *, void *);
+void isofntrans(u_char *, int, u_char *, u_short *, int, int, int, int, void *);
+ino_t isodirino(struct iso_directory_record *, struct iso_mnt *);
+u_short sgetrune(const char *, size_t, char const **, int, void *);
+
+#endif /* _KERNEL */
+
+/*
+ * The isonum_xxx functions are inlined anyway, and could come handy even
+ * outside the kernel.  Thus we don't hide them here.
+ */
+
+/*
+ * 7xy
+ *  x -> 1 = 8 bits, 2 = 16 bits, 3 = 32 bits
+ *   y -> 1 = little-endian, 2 = big-endian, 3 = both (le then be)
+ */
+
+static __inline uint8_t
+isonum_711(unsigned char *p)
+{
+	return p[0];
+}
+
+static __inline uint8_t
+isonum_712(unsigned char *p)
+{
+	return p[0];
+}
+
+static __inline uint8_t
+isonum_713(unsigned char *p)
+{
+	return p[0];
+}
+
+static __inline uint16_t
+isonum_721(unsigned char *p)
+{
+	return (p[0] | p[1] << 8);
+}
+
+static __inline uint16_t
+isonum_722(unsigned char *p)
+{
+	return (p[1] | p[0] << 8);
+}
+
+static __inline uint16_t
+isonum_723(unsigned char *p)
+{
+	return (p[0] | p[1] << 8);
+}
+
+static __inline uint32_t
+isonum_731(unsigned char *p)
+{
+	return (p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24);
+}
+
+static __inline uint32_t
+isonum_732(unsigned char *p)
+{
+	return (p[3] | p[2] << 8 | p[1] << 16 | p[0] << 24);
+}
+
+static __inline uint32_t
+isonum_733(unsigned char *p)
+{
+	return (p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24);
+}
+
+/*
+ * Associated files have a leading '='.
+ */
+#define	ASSOCCHAR	'='
--- /dev/null
+++ sys/fs/cd9660/cd9660_util.c
@@ -0,0 +1,243 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace at blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai at spec.co.jp). Joliet support was added by
+ * Joachim Kuebart (joki at kuebart.stuttgart.netsurf.de).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_util.c	8.3 (Berkeley) 12/5/94
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/cd9660/cd9660_util.c,v 1.23 2007/02/11 13:54:25 rodrigc Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/iconv.h>
+
+#include <fs/cd9660/iso.h>
+#include <fs/cd9660/cd9660_mount.h>
+
+extern struct iconv_functions *cd9660_iconv;
+
+/*
+ * Get one character out of an iso filename
+ * Obey joliet_level
+ * Return number of bytes consumed
+ */
+int
+isochar(isofn, isoend, joliet_level, c, clen, flags, handle)
+      u_char *isofn;
+      u_char *isoend;
+      int joliet_level;
+      u_short *c;
+      int *clen;
+      int flags;
+      void *handle;
+{
+      size_t i, j, len;
+      char inbuf[3], outbuf[3], *inp, *outp;
+
+      *c = *isofn++;
+      if (clen) *clen = 1;
+      if (joliet_level == 0 || isofn == isoend)
+              /* (00) and (01) are one byte in Joliet, too */
+              return 1;
+
+      if (flags & ISOFSMNT_KICONV && cd9660_iconv) {
+              i = j = len = 2;
+              inbuf[0]=(char)*(isofn - 1);
+              inbuf[1]=(char)*isofn;
+              inbuf[2]='\0';
+              inp = inbuf;
+              outp = outbuf;
+              cd9660_iconv->convchr(handle, (const char **)&inp, &i, &outp, &j);
+              len -= j;
+              if (clen) *clen = len;
+              *c = '\0';
+              while(len--)
+                      *c |= (*(outp - len - 1) & 0xff) << (len << 3);
+      } else {
+              switch (*c) {
+              default:
+                      *c = '?';
+                      break;
+              case '\0':
+                      *c = *isofn;
+                      break;
+              }
+      }
+
+      return 2;
+}
+
+/*
+ * translate and compare a filename
+ * returns (fn - isofn)
+ * Note: Version number plus ';' may be omitted.
+ */
+int
+isofncmp(fn, fnlen, isofn, isolen, joliet_level, flags, handle, lhandle)
+	u_char *fn;
+	int fnlen;
+	u_char *isofn;
+	int isolen;
+	int joliet_level;
+	int flags;
+	void *handle;
+	void *lhandle;
+{
+	int i, j;
+	u_short c, d;
+	u_char *fnend = fn + fnlen, *isoend = isofn + isolen;
+
+	for (; fn < fnend; ) {
+		d = sgetrune(fn, fnend - fn, (char const **)&fn, flags, lhandle);
+		if (isofn == isoend)
+			return d;
+		isofn += isochar(isofn, isoend, joliet_level, &c, NULL, flags, handle);
+		if (c == ';') {
+			if (d != ';')
+				return d;
+			for (i = 0; fn < fnend; i = i * 10 + *fn++ - '0') {
+				if (*fn < '0' || *fn > '9') {
+					return -1;
+				}
+			}
+			for (j = 0; isofn != isoend; j = j * 10 + c - '0')
+				isofn += isochar(isofn, isoend,
+						 joliet_level, &c,
+						 NULL, flags, handle);
+			return i - j;
+		}
+		if (c != d) {
+			if (c >= 'A' && c <= 'Z') {
+				if (c + ('a' - 'A') != d) {
+					if (d >= 'a' && d <= 'z')
+						return d - ('a' - 'A') - c;
+					else
+						return d - c;
+				}
+			} else
+				return d - c;
+		}
+	}
+	if (isofn != isoend) {
+		isofn += isochar(isofn, isoend, joliet_level, &c, NULL, flags, handle);
+		switch (c) {
+		default:
+			return -c;
+		case '.':
+			if (isofn != isoend) {
+				isochar(isofn, isoend, joliet_level, &c,
+					NULL, flags, handle);
+				if (c == ';')
+					return 0;
+			}
+			return -1;
+		case ';':
+			return 0;
+		}
+	}
+	return 0;
+}
+
+/*
+ * translate a filename of length > 0
+ */
+void
+isofntrans(infn, infnlen, outfn, outfnlen, original, assoc, joliet_level, flags, handle)
+	u_char *infn;
+	int infnlen;
+	u_char *outfn;
+	u_short *outfnlen;
+	int original;
+	int assoc;
+	int joliet_level;
+	int flags;
+	void *handle;
+{
+	u_short c, d = '\0';
+	u_char *outp = outfn, *infnend = infn + infnlen;
+	int clen;
+
+	if (assoc) {
+		*outp++ = ASSOCCHAR;
+	}
+	for (; infn != infnend; ) {
+		infn += isochar(infn, infnend, joliet_level, &c, &clen, flags, handle);
+
+		if (!original && !joliet_level && c >= 'A' && c <= 'Z')
+			c += ('a' - 'A');
+		else if (!original && c == ';') {
+			outp -= (d == '.');
+			break;
+		}
+		d = c;
+		while(clen--)
+			*outp++ = c >> (clen << 3);
+	}
+	*outfnlen = outp - outfn;
+}
+
+/*
+ * same as sgetrune(3)
+ */
+u_short
+sgetrune(string, n, result, flags, handle)
+	const char *string;
+	size_t n;
+	char const **result;
+	int flags;
+	void *handle;
+{
+	size_t i, j, len;
+	char outbuf[3], *outp;
+	u_short c = '\0';
+
+	len = i = (n < 2) ? n : 2;
+	j = 2;
+	outp = outbuf;
+
+	if (flags & ISOFSMNT_KICONV && cd9660_iconv) {
+		cd9660_iconv->convchr(handle, (const char **)&string,
+			&i, &outp, &j);
+		len -= i;
+	} else {
+		len = 1;
+		string++;
+	}
+
+	if (result) *result = string;
+	while(len--) c |= (*(string - len - 1) & 0xff) << (len << 3);
+	return (c);
+}
--- /dev/null
+++ sys/fs/cd9660/cd9660_lookup.c
@@ -0,0 +1,430 @@
+/*-
+ * Copyright (c) 1989, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace at blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai at spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)ufs_lookup.c	7.33 (Berkeley) 5/19/91
+ *	@(#)cd9660_lookup.c	8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/cd9660/cd9660_lookup.c,v 1.43 2007/02/11 13:54:25 rodrigc Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <fs/cd9660/iso.h>
+#include <fs/cd9660/cd9660_node.h>
+#include <fs/cd9660/iso_rrip.h>
+
+/*
+ * Convert a component of a pathname into a pointer to a locked inode.
+ * This is a very central and rather complicated routine.
+ * If the filesystem is not maintained in a strict tree hierarchy,
+ * this can result in a deadlock situation (see comments in code below).
+ *
+ * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
+ * whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it and the target of the pathname
+ * exists, lookup returns both the target and its parent directory locked.
+ * When creating or renaming and LOCKPARENT is specified, the target may
+ * not be ".".  When deleting and LOCKPARENT is specified, the target may
+ * be "."., but the caller must check to ensure it does an vrele and iput
+ * instead of two iputs.
+ *
+ * Overall outline of ufs_lookup:
+ *
+ *	search for name in directory, to found or notfound
+ * notfound:
+ *	if creating, return locked directory, leaving info on available slots
+ *	else return error
+ * found:
+ *	if at end of path and deleting, return information to allow delete
+ *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
+ *	  inode and return info to allow rewrite
+ *	if not at end, add name to cache; if at end and neither creating
+ *	  nor deleting, add name to cache
+ *
+ * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked.
+ */
+int
+cd9660_lookup(ap)
+	struct vop_cachedlookup_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	struct vnode *vdp;		/* vnode for directory being searched */
+	struct iso_node *dp;		/* inode for directory being searched */
+	struct iso_mnt *imp;		/* filesystem that directory is in */
+	struct buf *bp;			/* a buffer of directory entries */
+	struct iso_directory_record *ep = 0;/* the current directory entry */
+	int entryoffsetinblock;		/* offset of ep in bp's buffer */
+	int saveoffset = 0;		/* offset of last directory entry in dir */
+	int numdirpasses;		/* strategy for directory search */
+	doff_t endsearch;		/* offset to end directory search */
+	struct vnode *pdp;		/* saved dp during symlink work */
+	struct vnode *tdp;		/* returned by cd9660_vget_internal */
+	u_long bmask;			/* block offset mask */
+	int error;
+	ino_t ino = 0, saved_ino;
+	int reclen;
+	u_short namelen;
+	int isoflags;
+	char altname[NAME_MAX];
+	int res;
+	int assoc, len;
+	char *name;
+	struct vnode **vpp = ap->a_vpp;
+	struct componentname *cnp = ap->a_cnp;
+	int flags = cnp->cn_flags;
+	int nameiop = cnp->cn_nameiop;
+	struct thread *td = cnp->cn_thread;
+
+	bp = NULL;
+	*vpp = NULL;
+	vdp = ap->a_dvp;
+	dp = VTOI(vdp);
+	imp = dp->i_mnt;
+
+	/*
+	 * We now have a segment name to search for, and a directory to search.
+	 */
+
+	len = cnp->cn_namelen;
+	name = cnp->cn_nameptr;
+	/*
+	 * A leading `=' means, we are looking for an associated file
+	 */
+	if ((assoc = (imp->iso_ftype != ISO_FTYPE_RRIP && *name == ASSOCCHAR)))
+	{
+		len--;
+		name++;
+	}
+
+	/*
+	 * If there is cached information on a previous search of
+	 * this directory, pick up where we last left off.
+	 * We cache only lookups as these are the most common
+	 * and have the greatest payoff. Caching CREATE has little
+	 * benefit as it usually must search the entire directory
+	 * to determine that the entry does not exist. Caching the
+	 * location of the last DELETE or RENAME has not reduced
+	 * profiling time and hence has been removed in the interest
+	 * of simplicity.
+	 */
+	bmask = imp->im_bmask;
+	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
+	    dp->i_diroff > dp->i_size) {
+		entryoffsetinblock = 0;
+		dp->i_offset = 0;
+		numdirpasses = 1;
+	} else {
+		dp->i_offset = dp->i_diroff;
+		if ((entryoffsetinblock = dp->i_offset & bmask) &&
+		    (error = cd9660_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp)))
+				return (error);
+		numdirpasses = 2;
+		nchstats.ncs_2passes++;
+	}
+	endsearch = dp->i_size;
+
+searchloop:
+	while (dp->i_offset < endsearch) {
+		/*
+		 * If offset is on a block boundary,
+		 * read the next directory block.
+		 * Release previous if it exists.
+		 */
+		if ((dp->i_offset & bmask) == 0) {
+			if (bp != NULL)
+				brelse(bp);
+			if ((error =
+			    cd9660_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp)) != 0)
+				return (error);
+			entryoffsetinblock = 0;
+		}
+		/*
+		 * Get pointer to next entry.
+		 */
+		ep = (struct iso_directory_record *)
+			((char *)bp->b_data + entryoffsetinblock);
+
+		reclen = isonum_711(ep->length);
+		if (reclen == 0) {
+			/* skip to next block, if any */
+			dp->i_offset =
+			    (dp->i_offset & ~bmask) + imp->logical_block_size;
+			continue;
+		}
+
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE)
+			/* illegal entry, stop */
+			break;
+
+		if (entryoffsetinblock + reclen > imp->logical_block_size)
+			/* entries are not allowed to cross boundaries */
+			break;
+
+		namelen = isonum_711(ep->name_len);
+		isoflags = isonum_711(imp->iso_ftype == ISO_FTYPE_HIGH_SIERRA?
+				      &ep->date[6]: ep->flags);
+
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE + namelen)
+			/* illegal entry, stop */
+			break;
+
+		/*
+		 * Check for a name match.
+		 */
+		switch (imp->iso_ftype) {
+		default:
+			if (!(isoflags & 4) == !assoc) {
+				if ((len == 1
+				     && *name == '.')
+				    || (flags & ISDOTDOT)) {
+					if (namelen == 1
+					    && ep->name[0] == ((flags & ISDOTDOT) ? 1 : 0)) {
+						/*
+						 * Save directory entry's inode number and
+						 * release directory buffer.
+						 */
+						dp->i_ino = isodirino(ep, imp);
+						goto found;
+					}
+					if (namelen != 1
+					    || ep->name[0] != 0)
+						goto notfound;
+				} else if (!(res = isofncmp(name, len,
+							    ep->name, namelen,
+							    imp->joliet_level,
+							    imp->im_flags,
+							    imp->im_d2l,
+							    imp->im_l2d))) {
+					if (isoflags & 2)
+						ino = isodirino(ep, imp);
+					else
+						ino = dbtob(bp->b_blkno)
+							+ entryoffsetinblock;
+					saveoffset = dp->i_offset;
+				} else if (ino)
+					goto foundino;
+#ifdef	NOSORTBUG	/* On some CDs directory entries are not sorted correctly */
+				else if (res < 0)
+					goto notfound;
+				else if (res > 0 && numdirpasses == 2)
+					numdirpasses++;
+#endif
+			}
+			break;
+		case ISO_FTYPE_RRIP:
+			if (isonum_711(ep->flags)&2)
+				ino = isodirino(ep, imp);
+			else
+				ino = dbtob(bp->b_blkno) + entryoffsetinblock;
+			dp->i_ino = ino;
+			cd9660_rrip_getname(ep,altname,&namelen,&dp->i_ino,imp);
+			if (namelen == cnp->cn_namelen
+			    && !bcmp(name,altname,namelen))
+				goto found;
+			ino = 0;
+			break;
+		}
+		dp->i_offset += reclen;
+		entryoffsetinblock += reclen;
+	}
+	if (ino) {
+foundino:
+		dp->i_ino = ino;
+		if (saveoffset != dp->i_offset) {
+			if (lblkno(imp, dp->i_offset) !=
+			    lblkno(imp, saveoffset)) {
+				if (bp != NULL)
+					brelse(bp);
+				if ((error = cd9660_blkatoff(vdp,
+				    (off_t)saveoffset, NULL, &bp)) != 0)
+					return (error);
+			}
+			entryoffsetinblock = saveoffset & bmask;
+			ep = (struct iso_directory_record *)
+				((char *)bp->b_data + entryoffsetinblock);
+			dp->i_offset = saveoffset;
+		}
+		goto found;
+	}
+notfound:
+	/*
+	 * If we started in the middle of the directory and failed
+	 * to find our target, we must check the beginning as well.
+	 */
+	if (numdirpasses == 2) {
+		numdirpasses--;
+		dp->i_offset = 0;
+		endsearch = dp->i_diroff;
+		goto searchloop;
+	}
+	if (bp != NULL)
+		brelse(bp);
+
+	/*
+	 * Insert name into cache (as non-existent) if appropriate.
+	 */
+	if (cnp->cn_flags & MAKEENTRY)
+		cache_enter(vdp, *vpp, cnp);
+	if (nameiop == CREATE || nameiop == RENAME)
+		return (EROFS);
+	return (ENOENT);
+
+found:
+	if (numdirpasses == 2)
+		nchstats.ncs_pass2++;
+
+	/*
+	 * Found component in pathname.
+	 * If the final component of path name, save information
+	 * in the cache as to where the entry was found.
+	 */
+	if ((flags & ISLASTCN) && nameiop == LOOKUP)
+		dp->i_diroff = dp->i_offset;
+
+	/*
+	 * Step through the translation in the name.  We do not `iput' the
+	 * directory because we may need it again if a symbolic link
+	 * is relative to the current directory.  Instead we save it
+	 * unlocked as "pdp".  We must get the target inode before unlocking
+	 * the directory to insure that the inode will not be removed
+	 * before we get it.  We prevent deadlock by always fetching
+	 * inodes from the root, moving down the directory tree. Thus
+	 * when following backward pointers ".." we must unlock the
+	 * parent directory before getting the requested directory.
+	 * There is a potential race condition here if both the current
+	 * and parent directories are removed before the `iget' for the
+	 * inode associated with ".." returns.  We hope that this occurs
+	 * infrequently since we cannot avoid this race condition without
+	 * implementing a sophisticated deadlock detection algorithm.
+	 * Note also that this simple deadlock detection scheme will not
+	 * work if the filesystem has any hard links other than ".."
+	 * that point backwards in the directory structure.
+	 */
+	pdp = vdp;
+	/*
+	 * If ino is different from dp->i_ino,
+	 * it's a relocated directory.
+	 */
+	if (flags & ISDOTDOT) {
+		saved_ino = dp->i_ino;
+		VOP_UNLOCK(pdp, 0, td);	/* race to get the inode */
+		error = cd9660_vget_internal(vdp->v_mount, saved_ino,
+					     LK_EXCLUSIVE, &tdp,
+					     saved_ino != ino, ep);
+		brelse(bp);
+		vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, td);
+		if (error)
+			return (error);
+		*vpp = tdp;
+	} else if (dp->i_number == dp->i_ino) {
+		brelse(bp);
+		VREF(vdp);	/* we want ourself, ie "." */
+		*vpp = vdp;
+	} else {
+		error = cd9660_vget_internal(vdp->v_mount, dp->i_ino,
+					     LK_EXCLUSIVE, &tdp,
+					     dp->i_ino != ino, ep);
+		brelse(bp);
+		if (error)
+			return (error);
+		*vpp = tdp;
+	}
+
+	/*
+	 * Insert name into cache if appropriate.
+	 */
+	if (cnp->cn_flags & MAKEENTRY)
+		cache_enter(vdp, *vpp, cnp);
+	return (0);
+}
+
+/*
+ * Return buffer with the contents of block "offset" from the beginning of
+ * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
+ * remaining space in the directory.
+ */
+int
+cd9660_blkatoff(vp, offset, res, bpp)
+	struct vnode *vp;
+	off_t offset;
+	char **res;
+	struct buf **bpp;
+{
+	struct iso_node *ip;
+	struct iso_mnt *imp;
+	struct buf *bp;
+	daddr_t lbn;
+	int bsize, bshift, error;
+
+	ip = VTOI(vp);
+	imp = ip->i_mnt;
+	lbn = lblkno(imp, offset);
+	bsize = blksize(imp, ip, lbn);
+	bshift = imp->im_bshift;
+
+	if ((error = bread(vp, lbn, bsize, NOCRED, &bp)) != 0) {
+		brelse(bp);
+		*bpp = NULL;
+		return (error);
+	}
+
+	/*
+	 * We must BMAP the buffer because the directory code may use b_blkno
+	 * to calculate the inode for certain types of directory entries.
+	 * We could get away with not doing it before we VMIO-backed the
+	 * directories because the buffers would get freed atomically with
+	 * the invalidation of their data.  But with VMIO-backed buffers
+	 * the buffers may be freed and then later reconstituted - and the
+	 * reconstituted buffer will have no knowledge of b_blkno.
+	 */
+	if (bp->b_blkno == bp->b_lblkno) {
+	        bp->b_blkno = (ip->iso_start + bp->b_lblkno) << (bshift - DEV_BSHIFT);
+        }
+
+	if (res)
+		*res = (char *)bp->b_data + blkoff(imp, offset);
+	*bpp = bp;
+	return (0);
+}
--- /dev/null
+++ sys/fs/cd9660/cd9660_rrip.c
@@ -0,0 +1,715 @@
+/*-
+ * Copyright (c) 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace at blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai at spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_rrip.c	8.6 (Berkeley) 12/5/94
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/cd9660/cd9660_rrip.c,v 1.30 2007/02/11 13:54:25 rodrigc Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+
+#include <fs/cd9660/iso.h>
+#include <fs/cd9660/cd9660_node.h>
+#include <fs/cd9660/cd9660_rrip.h>
+#include <fs/cd9660/iso_rrip.h>
+
+typedef int	rrt_func_t(void *, ISO_RRIP_ANALYZE *ana);
+
+typedef struct {
+	char type[2];
+	rrt_func_t *func;
+	void (*func2)(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana);
+	int result;
+} RRIP_TABLE;
+
+static int	cd9660_rrip_altname(ISO_RRIP_ALTNAME *p, ISO_RRIP_ANALYZE *ana);
+static int	cd9660_rrip_attr(ISO_RRIP_ATTR *p, ISO_RRIP_ANALYZE *ana);
+static int	cd9660_rrip_cont(ISO_RRIP_CONT *p, ISO_RRIP_ANALYZE *ana);
+static void	cd9660_rrip_defattr(struct iso_directory_record *isodir,
+		    ISO_RRIP_ANALYZE *ana);
+static void	cd9660_rrip_defname(struct iso_directory_record *isodir,
+		    ISO_RRIP_ANALYZE *ana);
+static void	cd9660_rrip_deftstamp(struct iso_directory_record *isodir,
+		    ISO_RRIP_ANALYZE *ana);
+static int	cd9660_rrip_device(ISO_RRIP_DEVICE *p, ISO_RRIP_ANALYZE *ana);
+static int	cd9660_rrip_extref(ISO_RRIP_EXTREF *p, ISO_RRIP_ANALYZE *ana);
+static int	cd9660_rrip_idflag(ISO_RRIP_IDFLAG *p, ISO_RRIP_ANALYZE *ana);
+static int	cd9660_rrip_loop(struct iso_directory_record *isodir,
+		    ISO_RRIP_ANALYZE *ana, RRIP_TABLE *table);
+static int	cd9660_rrip_pclink(ISO_RRIP_CLINK *p, ISO_RRIP_ANALYZE *ana);
+static int	cd9660_rrip_reldir(ISO_RRIP_RELDIR *p, ISO_RRIP_ANALYZE *ana);
+static int	cd9660_rrip_slink(ISO_RRIP_SLINK *p, ISO_RRIP_ANALYZE *ana);
+static int	cd9660_rrip_stop(ISO_SUSP_HEADER *p, ISO_RRIP_ANALYZE *ana);
+static int	cd9660_rrip_tstamp(ISO_RRIP_TSTAMP *p, ISO_RRIP_ANALYZE *ana);
+
+/*
+ * POSIX file attribute
+ */
+static int
+cd9660_rrip_attr(p,ana)
+	ISO_RRIP_ATTR *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	ana->inop->inode.iso_mode = isonum_733(p->mode);
+	ana->inop->inode.iso_uid = isonum_733(p->uid);
+	ana->inop->inode.iso_gid = isonum_733(p->gid);
+	ana->inop->inode.iso_links = isonum_733(p->links);
+	ana->fields &= ~ISO_SUSP_ATTR;
+	return ISO_SUSP_ATTR;
+}
+
+static void
+cd9660_rrip_defattr(isodir,ana)
+	struct iso_directory_record *isodir;
+	ISO_RRIP_ANALYZE *ana;
+{
+	/* But this is a required field! */
+	printf("RRIP without PX field?\n");
+	cd9660_defattr(isodir,ana->inop,NULL,ISO_FTYPE_RRIP);
+}
+
+/*
+ * Symbolic Links
+ */
+static int
+cd9660_rrip_slink(p,ana)
+	ISO_RRIP_SLINK	*p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	ISO_RRIP_SLINK_COMPONENT *pcomp;
+	ISO_RRIP_SLINK_COMPONENT *pcompe;
+	int len, wlen, cont;
+	char *outbuf, *inbuf;
+
+	pcomp = (ISO_RRIP_SLINK_COMPONENT *)p->component;
+	pcompe = (ISO_RRIP_SLINK_COMPONENT *)((char *)p + isonum_711(p->h.length));
+	len = *ana->outlen;
+	outbuf = ana->outbuf;
+	cont = ana->cont;
+
+	/*
+	 * Gathering a Symbolic name from each component with path
+	 */
+	for (;
+	     pcomp < pcompe;
+	     pcomp = (ISO_RRIP_SLINK_COMPONENT *)((char *)pcomp + ISO_RRIP_SLSIZ
+						  + isonum_711(pcomp->clen))) {
+
+		if (!cont) {
+			if (len < ana->maxlen) {
+				len++;
+				*outbuf++ = '/';
+			}
+		}
+		cont = 0;
+
+		inbuf = "..";
+		wlen = 0;
+
+		switch (*pcomp->cflag) {
+
+		case ISO_SUSP_CFLAG_CURRENT:
+			/* Inserting Current */
+			wlen = 1;
+			break;
+
+		case ISO_SUSP_CFLAG_PARENT:
+			/* Inserting Parent */
+			wlen = 2;
+			break;
+
+		case ISO_SUSP_CFLAG_ROOT:
+			/* Inserting slash for ROOT */
+			/* Double slash, nothing really to do here. */
+			break;
+
+		case ISO_SUSP_CFLAG_VOLROOT:
+			/* Inserting a mount point i.e. "/cdrom" */
+			/* same as above */
+			outbuf -= len;
+			len = 0;
+			inbuf = ana->imp->im_mountp->mnt_stat.f_mntonname;
+			wlen = strlen(inbuf);
+			break;
+
+		case ISO_SUSP_CFLAG_HOST:
+			/* Inserting hostname i.e. "kurt.tools.de" */
+			inbuf = hostname;
+			wlen = strlen(hostname);
+			break;
+
+		case ISO_SUSP_CFLAG_CONTINUE:
+			cont = 1;
+			/* FALLTHROUGH */
+		case 0:
+			/* Inserting component */
+			wlen = isonum_711(pcomp->clen);
+			inbuf = pcomp->name;
+			break;
+		default:
+			printf("RRIP with incorrect flags?");
+			wlen = ana->maxlen + 1;
+			break;
+		}
+
+		if (len + wlen > ana->maxlen) {
+			/* indicate error to caller */
+			ana->cont = 1;
+			ana->fields = 0;
+			ana->outbuf -= *ana->outlen;
+			*ana->outlen = 0;
+			return 0;
+		}
+
+		bcopy(inbuf,outbuf,wlen);
+		outbuf += wlen;
+		len += wlen;
+
+	}
+	ana->outbuf = outbuf;
+	*ana->outlen = len;
+	ana->cont = cont;
+
+	if (!isonum_711(p->flags)) {
+		ana->fields &= ~ISO_SUSP_SLINK;
+		return ISO_SUSP_SLINK;
+	}
+	return 0;
+}
+
+/*
+ * Alternate name
+ */
+static int
+cd9660_rrip_altname(p,ana)
+	ISO_RRIP_ALTNAME *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	char *inbuf;
+	int wlen;
+	int cont;
+
+	inbuf = "..";
+	wlen = 0;
+	cont = 0;
+
+	switch (*p->flags) {
+	case ISO_SUSP_CFLAG_CURRENT:
+		/* Inserting Current */
+		wlen = 1;
+		break;
+
+	case ISO_SUSP_CFLAG_PARENT:
+		/* Inserting Parent */
+		wlen = 2;
+		break;
+
+	case ISO_SUSP_CFLAG_HOST:
+		/* Inserting hostname i.e. "kurt.tools.de" */
+		inbuf = hostname;
+		wlen = strlen(hostname);
+		break;
+
+	case ISO_SUSP_CFLAG_CONTINUE:
+		cont = 1;
+		/* FALLTHROUGH */
+	case 0:
+		/* Inserting component */
+		wlen = isonum_711(p->h.length) - 5;
+		inbuf = (char *)p + 5;
+		break;
+
+	default:
+		printf("RRIP with incorrect NM flags?\n");
+		wlen = ana->maxlen + 1;
+		break;
+	}
+
+	if ((*ana->outlen += wlen) > ana->maxlen) {
+		/* treat as no name field */
+		ana->fields &= ~ISO_SUSP_ALTNAME;
+		ana->outbuf -= *ana->outlen - wlen;
+		*ana->outlen = 0;
+		return 0;
+	}
+
+	bcopy(inbuf,ana->outbuf,wlen);
+	ana->outbuf += wlen;
+
+	if (!cont) {
+		ana->fields &= ~ISO_SUSP_ALTNAME;
+		return ISO_SUSP_ALTNAME;
+	}
+	return 0;
+}
+
+static void
+cd9660_rrip_defname(isodir,ana)
+	struct iso_directory_record *isodir;
+	ISO_RRIP_ANALYZE *ana;
+{
+	isofntrans(isodir->name,isonum_711(isodir->name_len),
+		   ana->outbuf,ana->outlen,
+		   1,isonum_711(isodir->flags)&4, ana->imp->joliet_level,
+		   ana->imp->im_flags, ana->imp->im_d2l);
+	switch (*ana->outbuf) {
+	default:
+		break;
+	case 1:
+		*ana->outlen = 2;
+		/* FALLTHROUGH */
+	case 0:
+		/* outlen is 1 already */
+		strcpy(ana->outbuf,"..");
+		break;
+	}
+}
+
+/*
+ * Parent or Child Link
+ */
+static int
+cd9660_rrip_pclink(p,ana)
+	ISO_RRIP_CLINK	*p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	*ana->inump = isonum_733(p->dir_loc) << ana->imp->im_bshift;
+	ana->fields &= ~(ISO_SUSP_CLINK|ISO_SUSP_PLINK);
+	return *p->h.type == 'C' ? ISO_SUSP_CLINK : ISO_SUSP_PLINK;
+}
+
+/*
+ * Relocated directory
+ */
+static int
+cd9660_rrip_reldir(p,ana)
+	ISO_RRIP_RELDIR	 *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	/* special hack to make caller aware of RE field */
+	*ana->outlen = 0;
+	ana->fields = 0;
+	return ISO_SUSP_RELDIR|ISO_SUSP_ALTNAME|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+}
+
+static int
+cd9660_rrip_tstamp(p,ana)
+	ISO_RRIP_TSTAMP *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	u_char *ptime;
+
+	ptime = p->time;
+
+	/* Check a format of time stamp (7bytes/17bytes) */
+	if (!(*p->flags&ISO_SUSP_TSTAMP_FORM17)) {
+		if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+			ptime += 7;
+
+		if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+			cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_mtime,
+					    ISO_FTYPE_RRIP);
+			ptime += 7;
+		} else
+			bzero(&ana->inop->inode.iso_mtime,sizeof(struct timespec));
+
+		if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+			cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_atime,
+					    ISO_FTYPE_RRIP);
+			ptime += 7;
+		} else
+			ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+
+		if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+			cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_ctime,
+					    ISO_FTYPE_RRIP);
+		else
+			ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+
+	} else {
+		if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+			ptime += 17;
+
+		if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+			cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_mtime);
+			ptime += 17;
+		} else
+			bzero(&ana->inop->inode.iso_mtime,sizeof(struct timespec));
+
+		if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+			cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_atime);
+			ptime += 17;
+		} else
+			ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+
+		if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+			cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_ctime);
+		else
+			ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+
+	}
+	ana->fields &= ~ISO_SUSP_TSTAMP;
+	return ISO_SUSP_TSTAMP;
+}
+
+static void
+cd9660_rrip_deftstamp(isodir,ana)
+	struct iso_directory_record  *isodir;
+	ISO_RRIP_ANALYZE *ana;
+{
+	cd9660_deftstamp(isodir,ana->inop,NULL,ISO_FTYPE_RRIP);
+}
+
+/*
+ * POSIX device modes
+ */
+static int
+cd9660_rrip_device(p,ana)
+	ISO_RRIP_DEVICE *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	u_int high, low;
+
+	high = isonum_733(p->dev_t_high);
+	low  = isonum_733(p->dev_t_low);
+
+	if (high == 0)
+		ana->inop->inode.iso_rdev = makedev(umajor(low), uminor(low));
+	else
+		ana->inop->inode.iso_rdev = makedev(high, uminor(low));
+	ana->fields &= ~ISO_SUSP_DEVICE;
+	return ISO_SUSP_DEVICE;
+}
+
+/*
+ * Flag indicating
+ */
+static int
+cd9660_rrip_idflag(p,ana)
+	ISO_RRIP_IDFLAG *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	ana->fields &= isonum_711(p->flags)|~0xff; /* don't touch high bits */
+	/* special handling of RE field */
+	if (ana->fields&ISO_SUSP_RELDIR)
+		return cd9660_rrip_reldir(/* XXX */ (ISO_RRIP_RELDIR *)p,ana);
+
+	return ISO_SUSP_IDFLAG;
+}
+
+/*
+ * Continuation pointer
+ */
+static int
+cd9660_rrip_cont(p,ana)
+	ISO_RRIP_CONT *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	ana->iso_ce_blk = isonum_733(p->location);
+	ana->iso_ce_off = isonum_733(p->offset);
+	ana->iso_ce_len = isonum_733(p->length);
+	return ISO_SUSP_CONT;
+}
+
+/*
+ * System Use end
+ */
+static int
+cd9660_rrip_stop(p,ana)
+	ISO_SUSP_HEADER *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	return ISO_SUSP_STOP;
+}
+
+/*
+ * Extension reference
+ */
+static int
+cd9660_rrip_extref(p,ana)
+	ISO_RRIP_EXTREF *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	if (isonum_711(p->len_id) != 10
+	    || bcmp((char *)p + 8,"RRIP_1991A",10)
+	    || isonum_711(p->version) != 1)
+		return 0;
+	ana->fields &= ~ISO_SUSP_EXTREF;
+	return ISO_SUSP_EXTREF;
+}
+
+static int
+cd9660_rrip_loop(isodir,ana,table)
+	struct iso_directory_record *isodir;
+	ISO_RRIP_ANALYZE *ana;
+	RRIP_TABLE *table;
+{
+	RRIP_TABLE *ptable;
+	ISO_SUSP_HEADER *phead;
+	ISO_SUSP_HEADER *pend;
+	struct buf *bp = NULL;
+	char *pwhead;
+	u_short c;
+	int result;
+
+	/*
+	 * Note: If name length is odd,
+	 *	 it will be padding 1 byte after the name
+	 */
+	pwhead = isodir->name + isonum_711(isodir->name_len);
+	if (!(isonum_711(isodir->name_len)&1))
+		pwhead++;
+	isochar(isodir->name, pwhead, ana->imp->joliet_level, &c, NULL,
+		ana->imp->im_flags, ana->imp->im_d2l);
+
+	/* If it's not the '.' entry of the root dir obey SP field */
+	if (c != 0 || isonum_733(isodir->extent) != ana->imp->root_extent)
+		pwhead += ana->imp->rr_skip;
+	else
+		pwhead += ana->imp->rr_skip0;
+
+	phead = (ISO_SUSP_HEADER *)pwhead;
+	pend = (ISO_SUSP_HEADER *)((char *)isodir + isonum_711(isodir->length));
+
+	result = 0;
+	while (1) {
+		ana->iso_ce_len = 0;
+		/*
+		 * Note: "pend" should be more than one SUSP header
+		 */
+		while (pend >= phead + 1) {
+			if (isonum_711(phead->version) == 1) {
+				for (ptable = table; ptable->func; ptable++) {
+					if (*phead->type == *ptable->type
+					    && phead->type[1] == ptable->type[1]) {
+						result |= ptable->func(phead,ana);
+						break;
+					}
+				}
+				if (!ana->fields)
+					break;
+			}
+			if (result&ISO_SUSP_STOP) {
+				result &= ~ISO_SUSP_STOP;
+				break;
+			}
+			/* plausibility check */
+			if (isonum_711(phead->length) < sizeof(*phead))
+				break;
+			/*
+			 * move to next SUSP
+			 * Hopefully this works with newer versions, too
+			 */
+			phead = (ISO_SUSP_HEADER *)((char *)phead + isonum_711(phead->length));
+		}
+
+		if (ana->fields && ana->iso_ce_len) {
+			if (ana->iso_ce_blk >= ana->imp->volume_space_size
+			    || ana->iso_ce_off + ana->iso_ce_len > ana->imp->logical_block_size
+			    || bread(ana->imp->im_devvp,
+				     ana->iso_ce_blk <<
+				     (ana->imp->im_bshift - DEV_BSHIFT),
+				     ana->imp->logical_block_size, NOCRED, &bp))
+				/* what to do now? */
+				break;
+			phead = (ISO_SUSP_HEADER *)(bp->b_data + ana->iso_ce_off);
+			pend = (ISO_SUSP_HEADER *) ((char *)phead + ana->iso_ce_len);
+		} else
+			break;
+	}
+	if (bp)
+		brelse(bp);
+	/*
+	 * If we don't find the Basic SUSP stuffs, just set default value
+	 *   (attribute/time stamp)
+	 */
+	for (ptable = table; ptable->func2; ptable++)
+		if (!(ptable->result&result))
+			ptable->func2(isodir,ana);
+
+	return result;
+}
+
+/*
+ * Get Attributes.
+ */
+/*
+ * XXX the casts are bogus but will do for now.
+ */
+#define	BC	(rrt_func_t *)
+static RRIP_TABLE rrip_table_analyze[] = {
+	{ "PX", BC cd9660_rrip_attr,	cd9660_rrip_defattr,	ISO_SUSP_ATTR },
+	{ "TF", BC cd9660_rrip_tstamp,	cd9660_rrip_deftstamp,	ISO_SUSP_TSTAMP },
+	{ "PN", BC cd9660_rrip_device,	0,			ISO_SUSP_DEVICE },
+	{ "RR", BC cd9660_rrip_idflag,	0,			ISO_SUSP_IDFLAG },
+	{ "CE", BC cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", BC cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+int
+cd9660_rrip_analyze(isodir,inop,imp)
+	struct iso_directory_record *isodir;
+	struct iso_node *inop;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_ANALYZE analyze;
+
+	analyze.inop = inop;
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_ATTR|ISO_SUSP_TSTAMP|ISO_SUSP_DEVICE;
+
+	return cd9660_rrip_loop(isodir,&analyze,rrip_table_analyze);
+}
+
+/*
+ * Get Alternate Name.
+ */
+static RRIP_TABLE rrip_table_getname[] = {
+	{ "NM", BC cd9660_rrip_altname,	cd9660_rrip_defname,	ISO_SUSP_ALTNAME },
+	{ "CL", BC cd9660_rrip_pclink,	0,			ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+	{ "PL", BC cd9660_rrip_pclink,	0,			ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+	{ "RE", BC cd9660_rrip_reldir,	0,			ISO_SUSP_RELDIR },
+	{ "RR", BC cd9660_rrip_idflag,	0,			ISO_SUSP_IDFLAG },
+	{ "CE", BC cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", BC cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+int
+cd9660_rrip_getname(isodir,outbuf,outlen,inump,imp)
+	struct iso_directory_record *isodir;
+	char *outbuf;
+	u_short *outlen;
+	ino_t *inump;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_ANALYZE analyze;
+	RRIP_TABLE *tab;
+	u_short c;
+
+	analyze.outbuf = outbuf;
+	analyze.outlen = outlen;
+	analyze.maxlen = NAME_MAX;
+	analyze.inump = inump;
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_ALTNAME|ISO_SUSP_RELDIR|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+	*outlen = 0;
+
+	isochar(isodir->name, isodir->name + isonum_711(isodir->name_len),
+		imp->joliet_level, &c, NULL, imp->im_flags, imp->im_d2l);
+	tab = rrip_table_getname;
+	if (c == 0 || c == 1) {
+		cd9660_rrip_defname(isodir,&analyze);
+
+		analyze.fields &= ~ISO_SUSP_ALTNAME;
+		tab++;
+	}
+
+	return cd9660_rrip_loop(isodir,&analyze,tab);
+}
+
+/*
+ * Get Symbolic Link.
+ */
+static RRIP_TABLE rrip_table_getsymname[] = {
+	{ "SL", BC cd9660_rrip_slink,	0,			ISO_SUSP_SLINK },
+	{ "RR", BC cd9660_rrip_idflag,	0,			ISO_SUSP_IDFLAG },
+	{ "CE", BC cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", BC cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+int
+cd9660_rrip_getsymname(isodir,outbuf,outlen,imp)
+	struct iso_directory_record *isodir;
+	char *outbuf;
+	u_short *outlen;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_ANALYZE analyze;
+
+	analyze.outbuf = outbuf;
+	analyze.outlen = outlen;
+	*outlen = 0;
+	analyze.maxlen = MAXPATHLEN;
+	analyze.cont = 1;		/* don't start with a slash */
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_SLINK;
+
+	return (cd9660_rrip_loop(isodir,&analyze,rrip_table_getsymname)&ISO_SUSP_SLINK);
+}
+
+static RRIP_TABLE rrip_table_extref[] = {
+	{ "ER", BC cd9660_rrip_extref,	0,			ISO_SUSP_EXTREF },
+	{ "CE", BC cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", BC cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+/*
+ * Check for Rock Ridge Extension and return offset of its fields.
+ * Note: We insist on the ER field.
+ */
+int
+cd9660_rrip_offset(isodir,imp)
+	struct iso_directory_record *isodir;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_OFFSET *p;
+	ISO_RRIP_ANALYZE analyze;
+
+	imp->rr_skip0 = 0;
+	p = (ISO_RRIP_OFFSET *)(isodir->name + 1);
+	if (bcmp(p,"SP\7\1\276\357",6)) {
+		/* Maybe, it's a CDROM XA disc? */
+		imp->rr_skip0 = 15;
+		p = (ISO_RRIP_OFFSET *)((char *)p + 15);
+		if (bcmp(p,"SP\7\1\276\357",6))
+			return -1;
+	}
+
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_EXTREF;
+	if (!(cd9660_rrip_loop(isodir,&analyze,rrip_table_extref)&ISO_SUSP_EXTREF))
+		return -1;
+
+	return isonum_711(p->skip);
+}
--- /dev/null
+++ sys/fs/cd9660/iso_rrip.h
@@ -0,0 +1,82 @@
+/*-
+ * Copyright (c) 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace at blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai at spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso_rrip.h	8.2 (Berkeley) 1/23/94
+ * $FreeBSD: src/sys/fs/cd9660/iso_rrip.h,v 1.9 2007/02/11 13:54:25 rodrigc Exp $
+ */
+
+
+/*
+ *	Analyze function flag (similar to RR field bits)
+ */
+#define	ISO_SUSP_ATTR		0x0001
+#define	ISO_SUSP_DEVICE		0x0002
+#define	ISO_SUSP_SLINK		0x0004
+#define	ISO_SUSP_ALTNAME	0x0008
+#define	ISO_SUSP_CLINK		0x0010
+#define	ISO_SUSP_PLINK		0x0020
+#define	ISO_SUSP_RELDIR		0x0040
+#define	ISO_SUSP_TSTAMP		0x0080
+#define	ISO_SUSP_IDFLAG		0x0100
+#define	ISO_SUSP_EXTREF		0x0200
+#define	ISO_SUSP_CONT		0x0400
+#define	ISO_SUSP_OFFSET		0x0800
+#define	ISO_SUSP_STOP		0x1000
+#define	ISO_SUSP_UNKNOWN	0x8000
+
+typedef struct {
+	struct iso_node	*inop;
+	int		fields;		/* interesting fields in this analysis */
+	daddr_t		iso_ce_blk;	/* block of continuation area */
+	off_t		iso_ce_off;	/* offset of continuation area */
+	int		iso_ce_len;	/* length of continuation area */
+	struct iso_mnt	*imp;		/* mount structure */
+	ino_t		*inump;		/* inode number pointer */
+	char		*outbuf;	/* name/symbolic link output area */
+	u_short		*outlen;	/* length of above */
+	u_short		maxlen;		/* maximum length of above */
+	int		cont;		/* continuation of above */
+} ISO_RRIP_ANALYZE;
+
+struct iso_directory_record;
+
+int cd9660_rrip_analyze(struct iso_directory_record *isodir,
+			    struct iso_node *inop, struct iso_mnt *imp);
+int cd9660_rrip_getname(struct iso_directory_record *isodir,
+			    char *outbuf, u_short *outlen,
+			    ino_t *inump, struct iso_mnt *imp);
+int cd9660_rrip_getsymname(struct iso_directory_record *isodir,
+			       char *outbuf, u_short *outlen,
+			       struct iso_mnt *imp);
+int cd9660_rrip_offset(struct iso_directory_record *isodir,
+			   struct iso_mnt *imp);
--- /dev/null
+++ sys/fs/cd9660/TODO
@@ -0,0 +1,43 @@
+# $FreeBSD: src/sys/fs/cd9660/TODO,v 1.8 2007/02/11 13:54:25 rodrigc Exp $
+
+ 2) should understand Rock Ridge
+
+   Yes, we have follows function.
+
+       o Symbolic Link
+       o Real Name(long name)
+       o File Attribute 
+       o Time stamp
+       o uid, gid
+       o Devices
+       o Relocated directories
+
+   Except follows:
+
+       o POSIX device number mapping
+
+         There is some preliminary stuff in there that (ab-)uses the mknod
+         system call, but this needs a writable filesystem
+         
+ 5) should have name translation enabled by mount flag
+
+   Yes. we can disable the Rock Ridge Extension by follows option;
+
+      "mount -t isofs -o -norrip /dev/cd0d /cdrom"
+
+ 6) should run as a user process, and not take up kernel space (cdroms
+    are slow)
+
+   Not yet.
+
+ 7) ECMA support.
+
+   Not yet. we need not only a technical spec but also ECMA format
+   cd-rom itself!
+
+ 8) Character set change by SVD ( multi SVD support )
+
+   Not yet. We should also hack the other part of system as 8 bit
+   clean. As far as I know, if you export the cdrom by NFS, the client
+   can access the 8 bit clean (ie. Solaris Japanese with EUC code )
+
--- /dev/null
+++ sys/fs/cd9660/cd9660_iconv.c
@@ -0,0 +1,36 @@
+/*-
+ * Copyright (c) 2003 Ryuichiro Imura
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/cd9660/cd9660_iconv.c,v 1.2 2007/02/11 13:54:25 rodrigc Exp $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/mount.h>
+#include <sys/iconv.h>
+
+VFS_DECLARE_ICONV(cd9660);
--- /dev/null
+++ sys/fs/cd9660/cd9660_bmap.c
@@ -0,0 +1,104 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace at blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai at spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_bmap.c	8.3 (Berkeley) 1/23/94
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/cd9660/cd9660_bmap.c,v 1.16 2007/02/11 13:54:25 rodrigc Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <fs/cd9660/iso.h>
+#include <fs/cd9660/cd9660_node.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the data block (extent) for the file.
+ */
+int
+cd9660_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct bufobj **a_bop;
+		daddr_t *a_bnp;
+		int *a_runp;
+		int *a_runb;
+	} */ *ap;
+{
+	struct iso_node *ip = VTOI(ap->a_vp);
+	daddr_t lblkno = ap->a_bn;
+	int bshift;
+
+	/*
+	 * Check for underlying vnode requests and ensure that logical
+	 * to physical mapping is requested.
+	 */
+	if (ap->a_bop != NULL)
+		*ap->a_bop = &ip->i_mnt->im_devvp->v_bufobj;
+	if (ap->a_bnp == NULL)
+		return (0);
+
+	/*
+	 * Compute the requested block number
+	 */
+	bshift = ip->i_mnt->im_bshift;
+	*ap->a_bnp = (ip->iso_start + lblkno) << (bshift - DEV_BSHIFT);
+
+	/*
+	 * Determine maximum number of readahead blocks following the
+	 * requested block.
+	 */
+	if (ap->a_runp) {
+		int nblk;
+
+		nblk = (ip->i_size >> bshift) - (lblkno + 1);
+		if (nblk <= 0)
+			*ap->a_runp = 0;
+		else if (nblk >= (MAXBSIZE >> bshift))
+			*ap->a_runp = (MAXBSIZE >> bshift) - 1;
+		else
+			*ap->a_runp = nblk;
+	}
+
+	if (ap->a_runb) {
+		*ap->a_runb = 0;
+	}
+
+	return 0;
+}
--- /dev/null
+++ sys/fs/cd9660/cd9660_mount.h
@@ -0,0 +1,54 @@
+/*-
+ * Copyright (c) 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace at blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai at spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_mount.h	8.1 (Berkeley) 5/24/95
+ * $FreeBSD: src/sys/fs/cd9660/cd9660_mount.h,v 1.9 2007/02/11 13:54:25 rodrigc Exp $
+ */
+
+/*
+ * Arguments to mount ISO 9660 filesystems.
+ */
+struct iso_args {
+	char	*fspec;			/* block special device to mount */
+	struct	export_args export;	/* network export info */
+	int	flags;			/* mounting flags, see below */
+	int	ssector;		/* starting sector, 0 for 1st session */
+	char	*cs_disk;		/* disk charset for Joliet cs conversion */
+	char	*cs_local;		/* local charset for Joliet cs conversion */
+};
+#define	ISOFSMNT_NORRIP	0x00000001	/* disable Rock Ridge Ext.*/
+#define	ISOFSMNT_GENS	0x00000002	/* enable generation numbers */
+#define	ISOFSMNT_EXTATT	0x00000004	/* enable extended attributes */
+#define ISOFSMNT_NOJOLIET 0x00000008	/* disable Joliet Ext.*/
+#define ISOFSMNT_BROKENJOLIET 0x00000010/* allow broken Joliet disks */
+#define	ISOFSMNT_KICONV 0x00000020	/* Use libiconv to convert chars */
--- /dev/null
+++ sys/fs/cd9660/cd9660_node.h
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace at blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai at spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_node.h	8.6 (Berkeley) 5/14/95
+ * $FreeBSD: src/sys/fs/cd9660/cd9660_node.h,v 1.33 2007/02/11 13:54:25 rodrigc Exp $
+ */
+
+/*
+ * Theoretically, directories can be more than 2Gb in length,
+ * however, in practice this seems unlikely. So, we define
+ * the type doff_t as a long to keep down the cost of doing
+ * lookup on a 32-bit machine. If you are porting to a 64-bit
+ * architecture, you should make doff_t the same as off_t.
+ */
+#define doff_t	long
+
+typedef	struct	{
+	struct timespec	iso_atime;	/* time of last access */
+	struct timespec	iso_mtime;	/* time of last modification */
+	struct timespec	iso_ctime;	/* time file changed */
+	u_short		iso_mode;	/* files access mode and type */
+	uid_t		iso_uid;	/* owner user id */
+	gid_t		iso_gid;	/* owner group id */
+	short		iso_links;	/* links of file */
+	dev_t		iso_rdev;	/* Major/Minor number for special */
+} ISO_RRIP_INODE;
+
+
+struct iso_node {
+	struct	vnode *i_vnode;	/* vnode associated with this inode */
+	u_long	i_flag;		/* see below */
+	ino_t	i_number;	/* the identity of the inode */
+				/* we use the actual starting block of the file */
+	struct	iso_mnt *i_mnt;	/* filesystem associated with this inode */
+	struct	lockf *i_lockf;	/* head of byte-level lock list */
+	doff_t	i_endoff;	/* end of useful stuff in directory */
+	doff_t	i_diroff;	/* offset in dir, where we found last entry */
+	doff_t	i_offset;	/* offset of free space in directory */
+	ino_t	i_ino;		/* inode number of found directory */
+
+	long iso_extent;	/* extent of file */
+	unsigned long i_size;
+	long iso_start;		/* actual start of data of file (may be different */
+				/* from iso_extent, if file has extended attributes) */
+	ISO_RRIP_INODE	inode;
+};
+
+#define	i_forw		i_chain[0]
+#define	i_back		i_chain[1]
+
+/* flags */
+#define	IN_ACCESS	0x0020		/* inode access time to be updated */
+
+#define VTOI(vp) ((struct iso_node *)(vp)->v_data)
+#define ITOV(ip) ((ip)->i_vnode)
+
+#ifdef _KERNEL
+
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_ISOFSMNT);
+MALLOC_DECLARE(M_ISOFSNODE);
+#endif
+
+struct buf;
+struct vop_bmap_args;
+struct vop_cachedlookup_args;
+struct vop_inactive_args;
+struct vop_reclaim_args;
+
+/*
+ * Prototypes for ISOFS vnode operations
+ */
+int cd9660_lookup(struct vop_cachedlookup_args *);
+int cd9660_inactive(struct vop_inactive_args *);
+int cd9660_reclaim(struct vop_reclaim_args *);
+int cd9660_bmap(struct vop_bmap_args *);
+int cd9660_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp);
+
+void cd9660_defattr(struct iso_directory_record *,
+			struct iso_node *, struct buf *, enum ISO_FTYPE);
+void cd9660_deftstamp(struct iso_directory_record *,
+			struct iso_node *, struct buf *, enum ISO_FTYPE);
+int cd9660_tstamp_conv7(u_char *, struct timespec *, enum ISO_FTYPE);
+int cd9660_tstamp_conv17(u_char *, struct timespec *);
+
+#endif /* _KERNEL */
--- /dev/null
+++ sys/fs/cd9660/TODO.hibler
@@ -0,0 +1,16 @@
+$FreeBSD: src/sys/fs/cd9660/TODO.hibler,v 1.4 2007/02/11 13:54:25 rodrigc Exp $
+
+1. Investiate making ISOFS another UFS shared filesystem (ala FFS/MFS/LFS).
+   Since it was modelled after the inode code, we might be able to merge
+   them back.  It looks like a separate (but very similar) lookup routine
+   will be needed due to the associated file stuff.
+
+2. It would be nice to be able to use the vfs_cluster code.
+   Unfortunately, if the logical block size is smaller than the page size,
+   it won't work.  Also, if throughtput is relatively constant for any
+   block size (as it is for the HP drive--150kbs) then clustering may not
+   buy much (or may even hurt when vfs_cluster comes up with a large sync
+   cluster).
+
+3. Seems like there should be a "notrans" or some such mount option to show
+   filenames as they really are without lower-casing.  Does this make sense?
--- /dev/null
+++ sys/fs/coda/cnode.h
@@ -0,0 +1,201 @@
+/*-
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/cnode.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ * $FreeBSD: src/sys/fs/coda/cnode.h,v 1.21.4.1 2008/01/23 12:09:43 rwatson Exp $
+ * 
+ */
+
+/*-
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda filesystem at Carnegie Mellon University.
+ * Contributers include David Steere, James Kistler, and M. Satyanarayanan.
+ */
+
+#ifndef	_CNODE_H_
+#define	_CNODE_H_
+
+#include <sys/vnode.h>
+#include <sys/lock.h>
+
+MALLOC_DECLARE(M_CODA);
+
+/*
+ * tmp below since we need struct queue
+ */
+#include <fs/coda/coda_kernel.h>
+
+/*
+ * Cnode lookup stuff.
+ * NOTE: CODA_CACHESIZE must be a power of 2 for cfshash to work!
+ */
+#define CODA_CACHESIZE 512
+
+#define CODA_ALLOC(ptr, cast, size)                                        \
+do {                                                                      \
+    ptr = (cast)malloc((unsigned long) size, M_CODA, M_WAITOK);            \
+    if (ptr == 0) {                                                       \
+	panic("kernel malloc returns 0 at %s:%d\n", __FILE__, __LINE__);  \
+    }                                                                     \
+} while (0)
+
+#define CODA_FREE(ptr, size)  free((ptr), M_CODA)
+
+/*
+ * Used to select debugging statements throughout the cfs code.
+ */
+extern int codadebug;
+extern int coda_printf_delay;
+extern int coda_vnop_print_entry;
+extern int coda_psdev_print_entry;
+extern int coda_vfsop_print_entry;
+
+#define CODADBGMSK(N)            (1 << N)
+#define CODADEBUG(N, STMT)       { if (codadebug & CODADBGMSK(N)) { STMT } }
+#define myprintf(args)          \
+do {                            \
+    if (coda_printf_delay)       \
+	DELAY(coda_printf_delay);\
+    printf args ;               \
+} while (0)
+
+struct cnode {
+    struct vnode	*c_vnode;
+    u_short		 c_flags;	/* flags (see below) */
+    CodaFid		 c_fid;		/* file handle */
+    struct vnode	*c_ovp;		/* open vnode pointer */
+    u_short		 c_ocount;	/* count of openers */
+    u_short		 c_owrite;	/* count of open for write */
+    struct vattr	 c_vattr; 	/* attributes */
+    char		*c_symlink;	/* pointer to symbolic link */
+    u_short		 c_symlen;	/* length of symbolic link */
+    struct cnode	*c_next;	/* links if on NetBSD machine */
+};
+#define	VTOC(vp)	((struct cnode *)(vp)->v_data)
+#define	CTOV(cp)	((struct vnode *)((cp)->c_vnode))
+
+/* flags */
+#define C_VATTR		0x01	/* Validity of vattr in the cnode */
+#define C_SYMLINK	0x02	/* Validity of symlink pointer in the Code */
+#define C_WANTED	0x08	/* Set if lock wanted */
+#define C_LOCKED	0x10	/* Set if lock held */
+#define C_UNMOUNTING	0X20	/* Set if unmounting */
+#define C_PURGING	0x40	/* Set if purging a fid */
+
+#define VALID_VATTR(cp)		((cp->c_flags) & C_VATTR)
+#define VALID_SYMLINK(cp)	((cp->c_flags) & C_SYMLINK)
+#define IS_UNMOUNTING(cp)	((cp)->c_flags & C_UNMOUNTING)
+
+struct vcomm {
+	u_long		vc_seq;
+	struct selinfo	vc_selproc;
+	struct queue	vc_requests;
+	struct queue	vc_replys;
+};
+
+#define	VC_OPEN(vcp)	    ((vcp)->vc_requests.forw != NULL)
+#define MARK_VC_CLOSED(vcp) (vcp)->vc_requests.forw = NULL;
+#define MARK_VC_OPEN(vcp)    /* MT */
+
+struct coda_clstat {
+	int	ncalls;			/* client requests */
+	int	nbadcalls;		/* upcall failures */
+	int	reqs[CODA_NCALLS];	/* count of each request */
+};
+extern struct coda_clstat coda_clstat;
+
+/*
+ * CODA structure to hold mount/filesystem information
+ */
+struct coda_mntinfo {
+    struct vnode	*mi_rootvp;
+    struct mount	*mi_vfsp;
+    struct vcomm	 mi_vcomm;
+    struct cdev		*dev;
+    int                  mi_started;
+    LIST_ENTRY(coda_mntinfo) mi_list;
+};
+struct coda_mntinfo *dev2coda_mntinfo(struct cdev *dev);
+
+/*
+ * vfs pointer to mount info
+ */
+#define vftomi(vfsp)    ((struct coda_mntinfo *)(vfsp->mnt_data))
+#define	CODA_MOUNTED(vfsp)   (vftomi((vfsp)) != (struct coda_mntinfo *)0)
+
+/*
+ * vnode pointer to mount info
+ */
+#define vtomi(vp)       ((struct coda_mntinfo *)(vp->v_mount->mnt_data))
+
+/*
+ * Used for identifying usage of "Control" object
+ */
+extern struct vnode *coda_ctlvp;
+#define	IS_CTL_VP(vp)		((vp) == coda_ctlvp)
+#define	IS_CTL_NAME(vp, name, l)((l == CODA_CONTROLLEN) \
+ 				 && ((vp) == vtomi((vp))->mi_rootvp)    \
+				 && strncmp(name, CODA_CONTROL, l) == 0)
+
+/* 
+ * An enum to tell us whether something that will remove a reference
+ * to a cnode was a downcall or not
+ */
+enum dc_status {
+    IS_DOWNCALL = 6,
+    NOT_DOWNCALL = 7
+};
+
+/* cfs_psdev.h */
+int coda_call(struct coda_mntinfo *mntinfo, int inSize, int *outSize, caddr_t buffer);
+extern int coda_kernel_version;
+
+/* cfs_subr.h */
+int  handleDownCall(int opcode, union outputArgs *out);
+void coda_unmounting(struct mount *whoIam);
+int  coda_vmflush(struct cnode *cp);
+
+/* cfs_vnodeops.h */
+struct cnode *make_coda_node(CodaFid *fid, struct mount *vfsp, short type);
+int coda_vnodeopstats_init(void);
+
+/* coda_vfsops.h */
+struct mount *devtomp(struct cdev *dev);
+
+/* sigh */
+#define CODA_RDWR ((u_long) 31)
+
+#endif	/* _CNODE_H_ */
+
--- /dev/null
+++ sys/fs/coda/coda_namecache.c
@@ -0,0 +1,758 @@
+/*-
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_namecache.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ */
+/*-
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda filesystem at Carnegie Mellon University.
+ * Contributers include David Steere, James Kistler, and M. Satyanarayanan.
+ */
+
+/*
+ * This module contains the routines to implement the CODA name cache. The
+ * purpose of this cache is to reduce the cost of translating pathnames 
+ * into Vice FIDs. Each entry in the cache contains the name of the file,
+ * the vnode (FID) of the parent directory, and the cred structure of the
+ * user accessing the file.
+ *
+ * The first time a file is accessed, it is looked up by the local Venus
+ * which first insures that the user has access to the file. In addition
+ * we are guaranteed that Venus will invalidate any name cache entries in
+ * case the user no longer should be able to access the file. For these
+ * reasons we do not need to keep access list information as well as a
+ * cred structure for each entry.
+ *
+ * The table can be accessed through the routines cnc_init(), cnc_enter(),
+ * cnc_lookup(), cnc_rmfidcred(), cnc_rmfid(), cnc_rmcred(), and cnc_purge().
+ * There are several other routines which aid in the implementation of the
+ * hash table.
+ */
+
+/*
+ * NOTES: rvb at cs
+ * 1.	The name cache holds a reference to every vnode in it.  Hence files can not be
+ *	 closed or made inactive until they are released.
+ * 2.	coda_nc_name(cp) was added to get a name for a cnode pointer for debugging.
+ * 3.	coda_nc_find() has debug code to detect when entries are stored with different
+ *	 credentials.  We don't understand yet, if/how entries are NOT EQ but still
+ *	 EQUAL
+ * 4.	I wonder if this name cache could be replace by the vnode name cache.
+ *	The latter has no zapping functions, so probably not.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/coda/coda_namecache.c,v 1.23 2007/07/12 21:04:57 rwatson Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/ucred.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+
+#include <fs/coda/coda.h>
+#include <fs/coda/cnode.h>
+#include <fs/coda/coda_namecache.h>
+
+#ifdef	DEBUG
+#include <fs/coda/coda_vnops.h>
+#endif
+
+/* 
+ * Declaration of the name cache data structure.
+ */
+
+int 	coda_nc_use = 1;			 /* Indicate use of CODA Name Cache */
+int	coda_nc_size = CODA_NC_CACHESIZE;	 /* size of the cache */
+int	coda_nc_hashsize = CODA_NC_HASHSIZE; /* size of the primary hash */
+
+struct 	coda_cache *coda_nc_heap;	/* pointer to the cache entries */
+struct	coda_hash  *coda_nc_hash;	/* hash table of coda_cache pointers */
+struct	coda_lru   coda_nc_lru;		/* head of lru chain */
+
+struct coda_nc_statistics coda_nc_stat;	/* Keep various stats */
+
+/* 
+ * for testing purposes
+ */
+int coda_nc_debug = 0;
+
+/*
+ * Entry points for the CODA Name Cache
+ */
+static struct coda_cache *coda_nc_find(struct cnode *dcp, const char *name, int namelen,
+	struct ucred *cred, int hash);
+static void coda_nc_remove(struct coda_cache *cncp, enum dc_status dcstat);
+
+/*  
+ * Initialize the cache, the LRU structure and the Hash structure(s)
+ */
+
+#define TOTAL_CACHE_SIZE 	(sizeof(struct coda_cache) * coda_nc_size)
+#define TOTAL_HASH_SIZE 	(sizeof(struct coda_hash)  * coda_nc_hashsize)
+
+int coda_nc_initialized = 0;      /* Initially the cache has not been initialized */
+
+void
+coda_nc_init(void)
+{
+    int i;
+
+    /* zero the statistics structure */
+    
+    bzero(&coda_nc_stat, (sizeof(struct coda_nc_statistics)));
+
+#ifdef	CODA_VERBOSE
+    printf("CODA NAME CACHE: CACHE %d, HASH TBL %d\n", CODA_NC_CACHESIZE, CODA_NC_HASHSIZE);
+#endif
+    CODA_ALLOC(coda_nc_heap, struct coda_cache *, TOTAL_CACHE_SIZE);
+    CODA_ALLOC(coda_nc_hash, struct coda_hash *, TOTAL_HASH_SIZE);
+    
+    coda_nc_lru.lru_next = 
+	coda_nc_lru.lru_prev = (struct coda_cache *)LRU_PART(&coda_nc_lru);
+    
+    
+    for (i=0; i < coda_nc_size; i++) {	/* initialize the heap */
+	CODA_NC_LRUINS(&coda_nc_heap[i], &coda_nc_lru);
+	CODA_NC_HSHNUL(&coda_nc_heap[i]);
+	coda_nc_heap[i].cp = coda_nc_heap[i].dcp = (struct cnode *)0;
+    }
+    
+    for (i=0; i < coda_nc_hashsize; i++) {	/* initialize the hashtable */
+	CODA_NC_HSHNUL((struct coda_cache *)&coda_nc_hash[i]);
+    }
+    
+    coda_nc_initialized++;
+}
+
+/*
+ * Auxillary routines -- shouldn't be entry points
+ */
+
+static struct coda_cache *
+coda_nc_find(dcp, name, namelen, cred, hash)
+	struct cnode *dcp;
+	const char *name;
+	int namelen;
+	struct ucred *cred;
+	int hash;
+{
+	/* 
+	 * hash to find the appropriate bucket, look through the chain
+	 * for the right entry (especially right cred, unless cred == 0) 
+	 */
+	struct coda_cache *cncp;
+	int count = 1;
+
+	CODA_NC_DEBUG(CODA_NC_FIND, 
+		    myprintf(("coda_nc_find(dcp %p, name %s, len %d, cred %p, hash %d\n",
+			   dcp, name, namelen, cred, hash));)
+
+	for (cncp = coda_nc_hash[hash].hash_next; 
+	     cncp != (struct coda_cache *)&coda_nc_hash[hash];
+	     cncp = cncp->hash_next, count++) 
+	{
+
+	    if ((CODA_NAMEMATCH(cncp, name, namelen, dcp)) &&
+		((cred == 0) || (cncp->cred == cred))) 
+	    { 
+		/* compare cr_uid instead */
+		coda_nc_stat.Search_len += count;
+		return(cncp);
+	    }
+#ifdef	DEBUG
+	    else if (CODA_NAMEMATCH(cncp, name, namelen, dcp)) {
+	    	printf("coda_nc_find: name %s, new cred = %p, cred = %p\n",
+			name, cred, cncp->cred);
+		printf("nref %d, nuid %d, ngid %d // oref %d, ocred %d, ogid %d\n",
+			cred->cr_ref, cred->cr_uid, cred->cr_gid,
+			cncp->cred->cr_ref, cncp->cred->cr_uid, cncp->cred->cr_gid);
+		print_cred(cred);
+		print_cred(cncp->cred);
+	    }
+#endif
+	}
+
+	return((struct coda_cache *)0);
+}
+
+/*
+ * Enter a new (dir cnode, name) pair into the cache, updating the
+ * LRU and Hash as needed.
+ */
+void
+coda_nc_enter(dcp, name, namelen, cred, cp)
+    struct cnode *dcp;
+    const char *name;
+    int namelen;
+    struct ucred *cred;
+    struct cnode *cp;
+{
+    struct coda_cache *cncp;
+    int hash;
+    
+    if (coda_nc_use == 0)			/* Cache is off */
+	return;
+    
+    CODA_NC_DEBUG(CODA_NC_ENTER, 
+		myprintf(("Enter: dcp %p cp %p name %s cred %p \n",
+		       dcp, cp, name, cred)); )
+	
+    if (namelen > CODA_NC_NAMELEN) {
+	CODA_NC_DEBUG(CODA_NC_ENTER, 
+		    myprintf(("long name enter %s\n",name));)
+	    coda_nc_stat.long_name_enters++;	/* record stats */
+	return;
+    }
+    
+    hash = CODA_NC_HASH(name, namelen, dcp);
+    cncp = coda_nc_find(dcp, name, namelen, cred, hash);
+    if (cncp != (struct coda_cache *) 0) {	
+	coda_nc_stat.dbl_enters++;		/* duplicate entry */
+	return;
+    }
+    
+    coda_nc_stat.enters++;		/* record the enters statistic */
+    
+    /* Grab the next element in the lru chain */
+    cncp = CODA_NC_LRUGET(coda_nc_lru);
+    
+    CODA_NC_LRUREM(cncp);	/* remove it from the lists */
+    
+    if (CODA_NC_VALID(cncp)) {
+	/* Seems really ugly, but we have to decrement the appropriate
+	   hash bucket length here, so we have to find the hash bucket
+	   */
+	coda_nc_hash[CODA_NC_HASH(cncp->name, cncp->namelen, cncp->dcp)].length--;
+	
+	coda_nc_stat.lru_rm++;	/* zapped a valid entry */
+	CODA_NC_HSHREM(cncp);
+	vrele(CTOV(cncp->dcp)); 
+	vrele(CTOV(cncp->cp));
+	crfree(cncp->cred);
+    }
+    
+    /*
+     * Put a hold on the current vnodes and fill in the cache entry.
+     */
+    vref(CTOV(cp));
+    vref(CTOV(dcp));
+    cncp->dcp = dcp;
+    cncp->cp = cp;
+    cncp->namelen = namelen;
+    cncp->cred = crhold(cred);
+    
+    bcopy(name, cncp->name, (unsigned)namelen);
+    
+    /* Insert into the lru and hash chains. */
+    
+    CODA_NC_LRUINS(cncp, &coda_nc_lru);
+    CODA_NC_HSHINS(cncp, &coda_nc_hash[hash]);
+    coda_nc_hash[hash].length++;                      /* Used for tuning */
+    
+    CODA_NC_DEBUG(CODA_NC_PRINTCODA_NC, print_coda_nc(); )
+}
+
+/*
+ * Find the (dir cnode, name) pair in the cache, if it's cred
+ * matches the input, return it, otherwise return 0
+ */
+struct cnode *
+coda_nc_lookup(dcp, name, namelen, cred)
+	struct cnode *dcp;
+	const char *name;
+	int namelen;
+	struct ucred *cred;
+{
+	int hash;
+	struct coda_cache *cncp;
+
+	if (coda_nc_use == 0)			/* Cache is off */
+		return((struct cnode *) 0);
+
+	if (namelen > CODA_NC_NAMELEN) {
+	        CODA_NC_DEBUG(CODA_NC_LOOKUP, 
+			    myprintf(("long name lookup %s\n",name));)
+		coda_nc_stat.long_name_lookups++;		/* record stats */
+		return((struct cnode *) 0);
+	}
+
+	/* Use the hash function to locate the starting point,
+	   then the search routine to go down the list looking for
+	   the correct cred.
+ 	 */
+
+	hash = CODA_NC_HASH(name, namelen, dcp);
+	cncp = coda_nc_find(dcp, name, namelen, cred, hash);
+	if (cncp == (struct coda_cache *) 0) {
+		coda_nc_stat.misses++;			/* record miss */
+		return((struct cnode *) 0);
+	}
+
+	coda_nc_stat.hits++;
+
+	/* put this entry at the end of the LRU */
+	CODA_NC_LRUREM(cncp);
+	CODA_NC_LRUINS(cncp, &coda_nc_lru);
+
+	/* move it to the front of the hash chain */
+	/* don't need to change the hash bucket length */
+	CODA_NC_HSHREM(cncp);
+	CODA_NC_HSHINS(cncp, &coda_nc_hash[hash]);
+
+	CODA_NC_DEBUG(CODA_NC_LOOKUP, 
+		printf("lookup: dcp %p, name %s, cred %p = cp %p\n",
+			dcp, name, cred, cncp->cp); )
+
+	return(cncp->cp);
+}
+
+static void
+coda_nc_remove(cncp, dcstat)
+	struct coda_cache *cncp;
+	enum dc_status dcstat;
+{
+	/* 
+	 * remove an entry -- vrele(cncp->dcp, cp), crfree(cred),
+	 * remove it from it's hash chain, and
+	 * place it at the head of the lru list.
+	 */
+        CODA_NC_DEBUG(CODA_NC_REMOVE,
+		    myprintf(("coda_nc_remove %s from parent %s\n",
+			      cncp->name, coda_f2s(&cncp->dcp->c_fid))); )	
+  	CODA_NC_HSHREM(cncp);
+
+	CODA_NC_HSHNUL(cncp);		/* have it be a null chain */
+	if ((dcstat == IS_DOWNCALL) && (vrefcnt(CTOV(cncp->dcp)) == 1)) {
+		cncp->dcp->c_flags |= C_PURGING;
+	}
+	vrele(CTOV(cncp->dcp)); 
+
+	if ((dcstat == IS_DOWNCALL) && (vrefcnt(CTOV(cncp->cp)) == 1)) {
+		cncp->cp->c_flags |= C_PURGING;
+	}
+	vrele(CTOV(cncp->cp)); 
+
+	crfree(cncp->cred); 
+	bzero(DATA_PART(cncp),DATA_SIZE);
+
+	/* Put the null entry just after the least-recently-used entry */
+	/* LRU_TOP adjusts the pointer to point to the top of the structure. */
+	CODA_NC_LRUREM(cncp);
+	CODA_NC_LRUINS(cncp, LRU_TOP(coda_nc_lru.lru_prev));
+}
+
+/*
+ * Remove all entries with a parent which has the input fid.
+ */
+void
+coda_nc_zapParentfid(fid, dcstat)
+	CodaFid *fid;
+	enum dc_status dcstat;
+{
+	/* To get to a specific fid, we might either have another hashing
+	   function or do a sequential search through the cache for the
+	   appropriate entries. The later may be acceptable since I don't
+	   think callbacks or whatever Case 1 covers are frequent occurences.
+	 */
+	struct coda_cache *cncp, *ncncp;
+	int i;
+
+	if (coda_nc_use == 0)			/* Cache is off */
+		return;	
+
+	CODA_NC_DEBUG(CODA_NC_ZAPPFID, 
+		      myprintf(("ZapParent: fid %s\n", coda_f2s(fid))); )
+
+	coda_nc_stat.zapPfids++;
+
+	for (i = 0; i < coda_nc_hashsize; i++) {
+
+		/*
+		 * Need to save the hash_next pointer in case we remove the
+		 * entry. remove causes hash_next to point to itself.
+		 */
+
+		for (cncp = coda_nc_hash[i].hash_next; 
+		     cncp != (struct coda_cache *)&coda_nc_hash[i];
+		     cncp = ncncp) {
+			ncncp = cncp->hash_next;
+			if (coda_fid_eq(&(cncp->dcp->c_fid), fid)) {
+			        coda_nc_hash[i].length--;      /* Used for tuning */
+				coda_nc_remove(cncp, dcstat); 
+			}
+		}
+	}
+}
+
+
+/*
+ * Remove all entries which have the same fid as the input
+ */
+void
+coda_nc_zapfid(fid, dcstat)
+	CodaFid *fid;
+	enum dc_status dcstat;
+{
+	/* See comment for zapParentfid. This routine will be used
+	   if attributes are being cached. 
+	 */
+	struct coda_cache *cncp, *ncncp;
+	int i;
+
+	if (coda_nc_use == 0)			/* Cache is off */
+		return;
+
+	CODA_NC_DEBUG(CODA_NC_ZAPFID, 
+		      myprintf(("Zapfid: fid %s\n", coda_f2s(fid))); )
+
+	coda_nc_stat.zapFids++;
+
+	for (i = 0; i < coda_nc_hashsize; i++) {
+		for (cncp = coda_nc_hash[i].hash_next; 
+		     cncp != (struct coda_cache *)&coda_nc_hash[i];
+		     cncp = ncncp) {
+			ncncp = cncp->hash_next;
+			if (coda_fid_eq(&cncp->cp->c_fid, fid)) {
+			    coda_nc_hash[i].length--;     /* Used for tuning */
+			    coda_nc_remove(cncp, dcstat); 
+			}
+		}
+	}
+}
+
+/* 
+ * Remove all entries which match the fid and the cred
+ */
+void
+coda_nc_zapvnode(fid, cred, dcstat)	
+	CodaFid *fid;
+	struct ucred *cred;
+	enum dc_status dcstat;
+{
+	/* See comment for zapfid. I don't think that one would ever
+	   want to zap a file with a specific cred from the kernel.
+	   We'll leave this one unimplemented.
+	 */
+
+	if (coda_nc_use == 0)			/* Cache is off */
+		return;
+
+	CODA_NC_DEBUG(CODA_NC_ZAPVNODE,
+		      myprintf(("Zapvnode: fid %s cred %p\n",
+				coda_f2s(fid), cred)); )
+
+ 
+
+}
+
+/*
+ * Remove all entries which have the (dir vnode, name) pair
+ */
+void
+coda_nc_zapfile(dcp, name, namelen)
+	struct cnode *dcp;
+	const char *name;
+	int namelen;
+{
+	/* use the hash function to locate the file, then zap all
+ 	   entries of it regardless of the cred.
+	 */
+	struct coda_cache *cncp;
+	int hash;
+
+	if (coda_nc_use == 0)			/* Cache is off */
+		return;
+
+	CODA_NC_DEBUG(CODA_NC_ZAPFILE, 
+		myprintf(("Zapfile: dcp %p name %s \n",
+			  dcp, name)); )
+
+	if (namelen > CODA_NC_NAMELEN) {
+		coda_nc_stat.long_remove++;		/* record stats */
+		return;
+	}
+
+	coda_nc_stat.zapFile++;
+
+	hash = CODA_NC_HASH(name, namelen, dcp);
+	cncp = coda_nc_find(dcp, name, namelen, 0, hash);
+
+	while (cncp) {
+	  coda_nc_hash[hash].length--;                 /* Used for tuning */
+
+	  coda_nc_remove(cncp, NOT_DOWNCALL);
+	  cncp = coda_nc_find(dcp, name, namelen, 0, hash);
+	}
+}
+
+/* 
+ * Remove all the entries for a particular user. Used when tokens expire.
+ * A user is determined by his/her effective user id (id_uid).
+ */
+void
+coda_nc_purge_user(uid, dcstat)
+	uid_t	uid;
+	enum dc_status  dcstat;
+{
+	/* 
+	 * I think the best approach is to go through the entire cache
+	 * via HASH or whatever and zap all entries which match the
+	 * input cred. Or just flush the whole cache.  It might be
+	 * best to go through on basis of LRU since cache will almost
+	 * always be full and LRU is more straightforward.  
+	 */
+
+	struct coda_cache *cncp, *ncncp;
+	int hash;
+
+	if (coda_nc_use == 0)			/* Cache is off */
+		return;
+
+	CODA_NC_DEBUG(CODA_NC_PURGEUSER, 
+		myprintf(("ZapDude: uid %x\n", uid)); )
+	coda_nc_stat.zapUsers++;
+
+	for (cncp = CODA_NC_LRUGET(coda_nc_lru);
+	     cncp != (struct coda_cache *)(&coda_nc_lru);
+	     cncp = ncncp) {
+		ncncp = CODA_NC_LRUGET(*cncp);
+
+		if ((CODA_NC_VALID(cncp)) &&
+		   ((cncp->cred)->cr_uid == uid)) {
+		        /* Seems really ugly, but we have to decrement the appropriate
+			   hash bucket length here, so we have to find the hash bucket
+			   */
+		        hash = CODA_NC_HASH(cncp->name, cncp->namelen, cncp->dcp);
+			coda_nc_hash[hash].length--;     /* For performance tuning */
+
+			coda_nc_remove(cncp, dcstat); 
+		}
+	}
+}
+
+/*
+ * Flush the entire name cache. In response to a flush of the Venus cache.
+ */
+void
+coda_nc_flush(dcstat)
+	enum dc_status dcstat;
+{
+	/* One option is to deallocate the current name cache and
+	   call init to start again. Or just deallocate, then rebuild.
+	   Or again, we could just go through the array and zero the 
+	   appropriate fields. 
+	 */
+	
+	/* 
+	 * Go through the whole lru chain and kill everything as we go.
+	 * I don't use remove since that would rebuild the lru chain
+	 * as it went and that seemed unneccesary.
+	 */
+	struct coda_cache *cncp;
+	int i;
+
+	if (coda_nc_use == 0)			/* Cache is off */
+		return;
+
+	coda_nc_stat.Flushes++;
+
+	for (cncp = CODA_NC_LRUGET(coda_nc_lru);
+	     cncp != (struct coda_cache *)&coda_nc_lru;
+	     cncp = CODA_NC_LRUGET(*cncp)) {
+		if (CODA_NC_VALID(cncp)) {
+
+			CODA_NC_HSHREM(cncp);	/* only zero valid nodes */
+			CODA_NC_HSHNUL(cncp);
+			if ((dcstat == IS_DOWNCALL) 
+			    && (vrefcnt(CTOV(cncp->dcp)) == 1))
+			{
+				cncp->dcp->c_flags |= C_PURGING;
+			}
+			vrele(CTOV(cncp->dcp)); 
+
+			ASSERT_VOP_LOCKED(CTOV(cncp->cp), "coda_nc_flush");
+			if (CTOV(cncp->cp)->v_vflag & VV_TEXT) {
+			    if (coda_vmflush(cncp->cp))
+				CODADEBUG(CODA_FLUSH, 
+			myprintf(("coda_nc_flush: %s busy\n",
+				 coda_f2s(&cncp->cp->c_fid))); )
+			}
+
+			if ((dcstat == IS_DOWNCALL) 
+			    && (vrefcnt(CTOV(cncp->cp)) == 1))
+			{
+				cncp->cp->c_flags |= C_PURGING;
+			}
+			vrele(CTOV(cncp->cp));  
+
+			crfree(cncp->cred); 
+			bzero(DATA_PART(cncp),DATA_SIZE);
+		}
+	}
+
+	for (i = 0; i < coda_nc_hashsize; i++)
+	  coda_nc_hash[i].length = 0;
+}
+
+/*
+ * Debugging routines
+ */
+
+/* 
+ * This routine should print out all the hash chains to the console.
+ */
+void
+print_coda_nc(void)
+{
+	int hash;
+	struct coda_cache *cncp;
+
+	for (hash = 0; hash < coda_nc_hashsize; hash++) {
+		myprintf(("\nhash %d\n",hash));
+
+		for (cncp = coda_nc_hash[hash].hash_next; 
+		     cncp != (struct coda_cache *)&coda_nc_hash[hash];
+		     cncp = cncp->hash_next) {
+			myprintf(("cp %p dcp %p cred %p name %s\n",
+				  cncp->cp, cncp->dcp,
+				  cncp->cred, cncp->name));
+		     }
+	}
+}
+
+void
+coda_nc_gather_stats(void)
+{
+    int i, max = 0, sum = 0, temp, zeros = 0, ave, n;
+
+	for (i = 0; i < coda_nc_hashsize; i++) {
+	  if (coda_nc_hash[i].length) {
+	    sum += coda_nc_hash[i].length;
+	  } else {
+	    zeros++;
+	  }
+
+	  if (coda_nc_hash[i].length > max)
+	    max = coda_nc_hash[i].length;
+	}
+
+	/*
+	 * When computing the Arithmetic mean, only count slots which 
+	 * are not empty in the distribution.
+	 */
+        coda_nc_stat.Sum_bucket_len = sum;
+        coda_nc_stat.Num_zero_len = zeros;
+        coda_nc_stat.Max_bucket_len = max;
+
+	if ((n = coda_nc_hashsize - zeros) > 0) 
+	  ave = sum / n;
+	else
+	  ave = 0;
+
+	sum = 0;
+	for (i = 0; i < coda_nc_hashsize; i++) {
+	  if (coda_nc_hash[i].length) {
+	    temp = coda_nc_hash[i].length - ave;
+	    sum += temp * temp;
+	  }
+	}
+        coda_nc_stat.Sum2_bucket_len = sum;
+}
+
+/*
+ * The purpose of this routine is to allow the hash and cache sizes to be
+ * changed dynamically. This should only be used in controlled environments,
+ * it makes no effort to lock other users from accessing the cache while it
+ * is in an improper state (except by turning the cache off).
+ */
+int
+coda_nc_resize(hashsize, heapsize, dcstat)
+     int hashsize, heapsize;
+     enum dc_status dcstat;
+{
+    if ((hashsize % 2) || (heapsize % 2)) { /* Illegal hash or cache sizes */
+	return(EINVAL);
+    }                 
+    
+    coda_nc_use = 0;                       /* Turn the cache off */
+    
+    coda_nc_flush(dcstat);                 /* free any cnodes in the cache */
+    
+    /* WARNING: free must happen *before* size is reset */
+    CODA_FREE(coda_nc_heap,TOTAL_CACHE_SIZE);
+    CODA_FREE(coda_nc_hash,TOTAL_HASH_SIZE);
+    
+    coda_nc_hashsize = hashsize;
+    coda_nc_size = heapsize;
+    
+    coda_nc_init();                        /* Set up a cache with the new size */
+    
+    coda_nc_use = 1;                       /* Turn the cache back on */
+    return(0);
+}
+
+#ifdef	DEBUG
+char coda_nc_name_buf[CODA_MAXNAMLEN+1];
+
+void
+coda_nc_name(struct cnode *cp)
+{
+	struct coda_cache *cncp, *ncncp;
+	int i;
+
+	if (coda_nc_use == 0)			/* Cache is off */
+		return;
+
+	for (i = 0; i < coda_nc_hashsize; i++) {
+		for (cncp = coda_nc_hash[i].hash_next; 
+		     cncp != (struct coda_cache *)&coda_nc_hash[i];
+		     cncp = ncncp) {
+			ncncp = cncp->hash_next;
+			if (cncp->cp == cp) {
+				bcopy(cncp->name, coda_nc_name_buf, cncp->namelen);
+				coda_nc_name_buf[cncp->namelen] = 0;
+				printf(" is %s (%p,%p)@%p",
+					coda_nc_name_buf, cncp->cp, cncp->dcp, cncp);
+			}
+
+		}
+	}
+}
+#endif
--- /dev/null
+++ sys/fs/coda/coda_venus.h
@@ -0,0 +1,132 @@
+/*-
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_venus.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ * $FreeBSD: src/sys/fs/coda/coda_venus.h,v 1.10 2007/07/12 20:40:37 rwatson Exp $
+ * 
+ */
+
+int
+venus_root(void *mdp,
+	struct ucred *cred, struct proc *p,
+/*out*/	CodaFid *VFid);
+
+int
+venus_open(void *mdp, CodaFid *fid, int flag,
+	struct ucred *cred, struct proc *p,
+/*out*/	struct vnode **vp);
+
+int
+venus_close(void *mdp, CodaFid *fid, int flag,
+	struct ucred *cred, struct proc *p);
+
+void
+venus_read(void);
+
+void
+venus_write(void);
+
+int
+venus_ioctl(void *mdp, CodaFid *fid,
+	int com, int flag, caddr_t data,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_getattr(void *mdp, CodaFid *fid,
+	struct ucred *cred, struct proc *p,
+/*out*/	struct vattr *vap);
+
+int
+venus_setattr(void *mdp, CodaFid *fid, struct vattr *vap,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_access(void *mdp, CodaFid *fid, int mode,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_readlink(void *mdp, CodaFid *fid,
+	struct ucred *cred, struct proc *p,
+/*out*/	char **str, int *len);
+
+int
+venus_fsync(void *mdp, CodaFid *fid, struct proc *p);
+
+int
+venus_lookup(void *mdp, CodaFid *fid,
+    	const char *nm, int len,
+	struct ucred *cred, struct proc *p,
+/*out*/	CodaFid *VFid, int *vtype);
+
+int
+venus_create(void *mdp, CodaFid *fid,
+    	const char *nm, int len, int exclusive, int mode, struct vattr *va,
+	struct ucred *cred, struct proc *p,
+/*out*/	CodaFid *VFid, struct vattr *attr);
+
+int
+venus_remove(void *mdp, CodaFid *fid,
+        const char *nm, int len,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_link(void *mdp, CodaFid *fid, CodaFid *tfid,
+        const char *nm, int len,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_rename(void *mdp, CodaFid *fid, CodaFid *tfid,
+        const char *nm, int len, const char *tnm, int tlen,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_mkdir(void *mdp, CodaFid *fid,
+    	const char *nm, int len, struct vattr *va,
+	struct ucred *cred, struct proc *p,
+/*out*/	CodaFid *VFid, struct vattr *ova);
+
+int
+venus_rmdir(void *mdp, CodaFid *fid,
+    	const char *nm, int len,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_symlink(void *mdp, CodaFid *fid,
+        const char *lnm, int llen, const char *nm, int len, struct vattr *va,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_readdir(void *mdp, CodaFid *fid,
+    	int count, int offset,
+	struct ucred *cred, struct proc *p,
+/*out*/	char *buffer, int *len);
+
+int
+venus_fhtovp(void *mdp, CodaFid *fid,
+	struct ucred *cred, struct proc *p,
+/*out*/	CodaFid *VFid, int *vtype);
--- /dev/null
+++ sys/fs/coda/coda_namecache.h
@@ -0,0 +1,196 @@
+/*-
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_namecache.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ * $FreeBSD: src/sys/fs/coda/coda_namecache.h,v 1.11.4.1 2008/01/23 12:09:43 rwatson Exp $
+ * 
+ */
+
+/*-
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda filesystem at Carnegie Mellon University.
+ * Contributers include David Steere, James Kistler, and M. Satyanarayanan.
+ */
+
+#ifndef _CODA_NC_HEADER_
+#define _CODA_NC_HEADER_
+
+/*
+ * Coda constants
+ */
+#define CODA_NC_NAMELEN	15		/* longest name stored in cache */
+#define CODA_NC_CACHESIZE 256		/* Default cache size */
+#define CODA_NC_HASHSIZE	64		/* Must be multiple of 2 */
+
+/*
+ * Hash function for the primary hash.
+ */
+
+/* 
+ * First try -- (first + last letters + length + (int)cp) mod size
+ * 2nd try -- same, except dir fid.vnode instead of cp
+ */
+
+#define CODA_NC_HASH(name, namelen, cp) \
+	((name[0] + (name[namelen-1]<<4) + namelen + (((int)(intptr_t)cp)>>8)) & (coda_nc_hashsize-1))
+
+#define CODA_NAMEMATCH(cp, name, namelen, dcp) \
+	((namelen == cp->namelen) && (dcp == cp->dcp) && \
+		 (bcmp(cp->name,name,namelen) == 0))
+
+/*
+ * Functions to modify the hash and lru chains.
+ * insque and remque assume that the pointers are the first thing
+ * in the list node, thus the trickery for lru.
+ */
+
+#define CODA_NC_HSHINS(elem, pred)	insque(elem,pred)
+#define CODA_NC_HSHREM(elem)		remque(elem)
+#define CODA_NC_HSHNUL(elem)		(elem)->hash_next = \
+					(elem)->hash_prev = (elem)
+
+#define CODA_NC_LRUINS(elem, pred)	insque(LRU_PART(elem), LRU_PART(pred))
+#define CODA_NC_LRUREM(elem)		remque(LRU_PART(elem));
+#define CODA_NC_LRUGET(lruhead)		LRU_TOP((lruhead).lru_prev)
+
+#define CODA_NC_VALID(cncp)	(cncp->dcp != (struct cnode *)0)
+ 
+#define LRU_PART(cncp)			(struct coda_cache *) \
+				((char *)cncp + (2*sizeof(struct coda_cache *)))
+#define LRU_TOP(cncp)				(struct coda_cache *) \
+			((char *)cncp - (2*sizeof(struct coda_cache *)))
+#define DATA_PART(cncp)				(struct coda_cache *) \
+			((char *)cncp + (4*sizeof(struct coda_cache *)))
+#define DATA_SIZE	(sizeof(struct coda_cache)-(4*sizeof(struct coda_cache *)))
+
+/*
+ * Structure for an element in the CODA Name Cache.
+ * NOTE: I use the position of arguments and their size in the
+ * implementation of the functions CODA_NC_LRUINS, CODA_NC_LRUREM, and
+ * DATA_PART.
+ */
+
+struct coda_cache {	
+	struct coda_cache	*hash_next,*hash_prev;	/* Hash list */
+	struct coda_cache	*lru_next, *lru_prev;	/* LRU list */
+	struct cnode	*cp;			/* vnode of the file */
+	struct cnode	*dcp;			/* parent's cnode */
+	struct ucred	*cred;			/* user credentials */
+	char		name[CODA_NC_NAMELEN];	/* segment name */
+	int		namelen;		/* length of name */
+};
+
+struct	coda_lru {		/* Start of LRU chain */
+	char *dummy1, *dummy2;			/* place holders */
+	struct coda_cache *lru_next, *lru_prev;   /* position of pointers is important */
+};
+
+
+struct coda_hash {		/* Start of Hash chain */
+	struct coda_cache *hash_next, *hash_prev; /* NOTE: chain pointers must be first */
+        int length;                             /* used for tuning purposes */
+};
+
+
+/* 
+ * Symbols to aid in debugging the namecache code. Assumes the existence
+ * of the variable coda_nc_debug, which is defined in cfs_namecache.c
+ */
+#define CODA_NC_DEBUG(N, STMT)     { if (coda_nc_debug & (1 <<N)) { STMT } }
+
+/* Prototypes of functions exported within cfs */
+void coda_nc_init(void);
+void coda_nc_enter(struct cnode *, const char *, int, struct ucred *, struct cnode *);
+struct cnode *coda_nc_lookup(struct cnode *, const char *, int, struct ucred *);
+
+void coda_nc_zapParentfid(CodaFid *, enum dc_status);
+void coda_nc_zapfid(CodaFid *, enum dc_status);
+void coda_nc_zapvnode(CodaFid *, struct ucred *, enum dc_status);
+void coda_nc_zapfile(struct cnode *, const char *, int);
+void coda_nc_purge_user(uid_t, enum dc_status);
+void coda_nc_flush(enum dc_status);
+
+void print_coda_nc(void);
+void coda_nc_gather_stats(void);
+int  coda_nc_resize(int, int, enum dc_status);
+void coda_nc_name(struct cnode *cp);
+
+/*
+ * Global variables tracking and controlling Coda namecache operation.
+ */
+extern int coda_nc_debug;		/* Set to enable debugging printfs */
+extern int coda_nc_initialized;		/* Set if cache has been initialized */
+extern int coda_nc_use;			/* Indicate use of CODA Name Cache */
+
+/*
+ * Structure to contain statistics on the cache usage
+ */
+
+struct coda_nc_statistics {
+	unsigned	hits;
+	unsigned	misses;
+	unsigned	enters;
+	unsigned	dbl_enters;
+	unsigned	long_name_enters;
+	unsigned	long_name_lookups;
+	unsigned	long_remove;
+	unsigned	lru_rm;
+	unsigned	zapPfids;
+	unsigned	zapFids;
+	unsigned	zapFile;
+	unsigned	zapUsers;
+	unsigned	Flushes;
+	unsigned        Sum_bucket_len;
+	unsigned        Sum2_bucket_len;
+	unsigned        Max_bucket_len;
+	unsigned        Num_zero_len;
+	unsigned        Search_len;
+};
+
+#define CODA_NC_FIND		((u_long) 1)
+#define CODA_NC_REMOVE		((u_long) 2)
+#define CODA_NC_INIT		((u_long) 3)
+#define CODA_NC_ENTER		((u_long) 4)
+#define CODA_NC_LOOKUP		((u_long) 5)
+#define CODA_NC_ZAPPFID		((u_long) 6)
+#define CODA_NC_ZAPFID		((u_long) 7)
+#define CODA_NC_ZAPVNODE		((u_long) 8)
+#define CODA_NC_ZAPFILE		((u_long) 9)
+#define CODA_NC_PURGEUSER		((u_long) 10)
+#define CODA_NC_FLUSH		((u_long) 11)
+#define CODA_NC_PRINTCODA_NC	((u_long) 12)
+#define CODA_NC_PRINTSTATS	((u_long) 13)
+
+#endif
--- /dev/null
+++ sys/fs/coda/coda_vfsops.c
@@ -0,0 +1,512 @@
+/*-
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ *  	@(#) src/sys/cfs/coda_vfsops.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ */
+/*-
+ * Mach Operating System
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda filesystem at Carnegie Mellon
+ * University.  Contributers include David Steere, James Kistler, and
+ * M. Satyanarayanan.  
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/coda/coda_vfsops.c,v 1.67.4.1 2008/01/23 12:09:43 rwatson Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+
+#include <fs/coda/coda.h>
+#include <fs/coda/cnode.h>
+#include <fs/coda/coda_vfsops.h>
+#include <fs/coda/coda_venus.h>
+#include <fs/coda/coda_subr.h>
+#include <fs/coda/coda_opstats.h>
+
+MALLOC_DEFINE(M_CODA, "coda", "Various Coda Structures");
+
+int codadebug = 0;
+int coda_vfsop_print_entry = 0;
+#define ENTRY    if(coda_vfsop_print_entry) myprintf(("Entered %s\n",__func__))
+
+struct vnode *coda_ctlvp;
+
+/* structure to keep statistics of internally generated/satisfied calls */
+
+struct coda_op_stats coda_vfsopstats[CODA_VFSOPS_SIZE];
+
+#define MARK_ENTRY(op) (coda_vfsopstats[op].entries++)
+#define MARK_INT_SAT(op) (coda_vfsopstats[op].sat_intrn++)
+#define MARK_INT_FAIL(op) (coda_vfsopstats[op].unsat_intrn++)
+#define MARK_INT_GEN(op) (coda_vfsopstats[op].gen_intrn++)
+
+int
+coda_vfsopstats_init(void)
+{
+	register int i;
+	
+	for (i=0;i<CODA_VFSOPS_SIZE;i++) {
+		coda_vfsopstats[i].opcode = i;
+		coda_vfsopstats[i].entries = 0;
+		coda_vfsopstats[i].sat_intrn = 0;
+		coda_vfsopstats[i].unsat_intrn = 0;
+		coda_vfsopstats[i].gen_intrn = 0;
+	}
+	
+	return 0;
+}
+
+static const char *coda_opts[] = { "from", NULL };
+/*
+ * cfs mount vfsop
+ * Set up mount info record and attach it to vfs struct.
+ */
+/*ARGSUSED*/
+int
+coda_mount(struct mount *vfsp, struct thread *td)
+{
+    struct vnode *dvp;
+    struct cnode *cp;
+    struct cdev *dev;
+    struct coda_mntinfo *mi;
+    struct vnode *rootvp;
+    CodaFid rootfid = INVAL_FID;
+    CodaFid ctlfid = CTL_FID;
+    int error;
+    struct nameidata ndp;
+    ENTRY;
+    char *from;
+
+    if (vfs_filteropt(vfsp->mnt_optnew, coda_opts))
+	return (EINVAL);
+
+    from = vfs_getopts(vfsp->mnt_optnew, "from", &error);
+    if (error)
+	return (error);
+
+    coda_vfsopstats_init();
+    coda_vnodeopstats_init();
+    
+    MARK_ENTRY(CODA_MOUNT_STATS);
+    if (CODA_MOUNTED(vfsp)) {
+	MARK_INT_FAIL(CODA_MOUNT_STATS);
+	return(EBUSY);
+    }
+    
+    /* Validate mount device.  Similar to getmdev(). */
+    NDINIT(&ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, from, td);
+    error = namei(&ndp);
+    dvp = ndp.ni_vp;
+
+    if (error) {
+	MARK_INT_FAIL(CODA_MOUNT_STATS);
+	return (error);
+    }
+    if (dvp->v_type != VCHR) {
+	MARK_INT_FAIL(CODA_MOUNT_STATS);
+	vrele(dvp);
+	NDFREE(&ndp, NDF_ONLY_PNBUF);
+	return(ENXIO);
+    }
+    dev = dvp->v_rdev;
+    vrele(dvp);
+    NDFREE(&ndp, NDF_ONLY_PNBUF);
+
+    /*
+     * Initialize the mount record and link it to the vfs struct
+     */
+    mi = dev2coda_mntinfo(dev);
+    if (!mi) {
+	MARK_INT_FAIL(CODA_MOUNT_STATS);
+	printf("Coda mount: %s is not a cfs device\n", from);
+	return(ENXIO);
+    }
+    
+    if (!VC_OPEN(&mi->mi_vcomm)) {
+	MARK_INT_FAIL(CODA_MOUNT_STATS);
+	return(ENODEV);
+    }
+    
+    /* No initialization (here) of mi_vcomm! */
+    vfsp->mnt_data = mi;
+    vfs_getnewfsid (vfsp);
+
+    mi->mi_vfsp = vfsp;
+    mi->mi_started = 0;			/* XXX See coda_root() */
+    
+    /*
+     * Make a root vnode to placate the Vnode interface, but don't
+     * actually make the CODA_ROOT call to venus until the first call
+     * to coda_root in case a server is down while venus is starting.
+     */
+    cp = make_coda_node(&rootfid, vfsp, VDIR);
+    rootvp = CTOV(cp);
+    rootvp->v_vflag |= VV_ROOT;
+	
+    cp = make_coda_node(&ctlfid, vfsp, VREG);
+    coda_ctlvp = CTOV(cp);
+
+    /* Add vfs and rootvp to chain of vfs hanging off mntinfo */
+    mi->mi_vfsp = vfsp;
+    mi->mi_rootvp = rootvp;
+    
+    vfs_mountedfrom(vfsp, from);
+    /* error is currently guaranteed to be zero, but in case some
+       code changes... */
+    CODADEBUG(1,
+	     myprintf(("coda_omount returned %d\n",error)););
+    if (error)
+	MARK_INT_FAIL(CODA_MOUNT_STATS);
+    else
+	MARK_INT_SAT(CODA_MOUNT_STATS);
+    
+    return(error);
+}
+
+int
+coda_unmount(vfsp, mntflags, td)
+    struct mount *vfsp;
+    int mntflags;
+    struct thread *td;
+{
+    struct coda_mntinfo *mi = vftomi(vfsp);
+    int active, error = 0;
+    
+    ENTRY;
+    MARK_ENTRY(CODA_UMOUNT_STATS);
+    if (!CODA_MOUNTED(vfsp)) {
+	MARK_INT_FAIL(CODA_UMOUNT_STATS);
+	return(EINVAL);
+    }
+    
+    if (mi->mi_vfsp == vfsp) {	/* We found the victim */
+	if (!IS_UNMOUNTING(VTOC(mi->mi_rootvp)))
+	    return (EBUSY); 	/* Venus is still running */
+
+#ifdef	DEBUG
+	printf("coda_unmount: ROOT: vp %p, cp %p\n", mi->mi_rootvp, VTOC(mi->mi_rootvp));
+#endif
+	vrele(mi->mi_rootvp);
+	mi->mi_rootvp = NULL;
+	vrele(coda_ctlvp);
+	coda_ctlvp = NULL;
+	active = coda_kill(vfsp, NOT_DOWNCALL);
+	error = vflush(mi->mi_vfsp, 0, FORCECLOSE, td);
+#ifdef CODA_VERBOSE
+	printf("coda_unmount: active = %d, vflush active %d\n", active, error);
+#endif
+	error = 0;
+	/* I'm going to take this out to allow lookups to go through. I'm
+	 * not sure it's important anyway. -- DCS 2/2/94
+	 */
+	/* vfsp->VFS_DATA = NULL; */
+
+	/* No more vfsp's to hold onto */
+	mi->mi_vfsp = NULL;
+
+	if (error)
+	    MARK_INT_FAIL(CODA_UMOUNT_STATS);
+	else
+	    MARK_INT_SAT(CODA_UMOUNT_STATS);
+
+	return(error);
+    }
+    return (EINVAL);
+}
+
+/*
+ * find root of cfs
+ */
+int
+coda_root(vfsp, flags, vpp, td)
+	struct mount *vfsp;
+	int flags;
+	struct vnode **vpp;
+	struct thread *td;
+{
+    struct coda_mntinfo *mi = vftomi(vfsp);
+    struct vnode **result;
+    int error;
+    struct proc *p = td->td_proc;
+    CodaFid VFid;
+    static const CodaFid invalfid = INVAL_FID;
+ 
+    ENTRY;
+    MARK_ENTRY(CODA_ROOT_STATS);
+    result = NULL;
+    
+    if (vfsp == mi->mi_vfsp) {
+	/*
+	 * Cache the root across calls. We only need to pass the request
+	 * on to Venus if the root vnode is the dummy we installed in
+	 * coda_omount() with all c_fid members zeroed.
+	 *
+	 * XXX In addition, we assume that the first call to coda_root()
+	 * is from vfs_omount()
+	 * (before the call to checkdirs()) and return the dummy root
+	 * node to avoid a deadlock. This bug is fixed in the Coda CVS
+	 * repository but not in any released versions as of 6 Mar 2003.
+	 */
+	if (memcmp(&VTOC(mi->mi_rootvp)->c_fid, &invalfid,
+	    sizeof(CodaFid)) != 0 || mi->mi_started == 0)
+	    { /* Found valid root. */
+		*vpp = mi->mi_rootvp;
+		mi->mi_started = 1;
+
+		/* On Mach, this is vref.  On NetBSD, VOP_LOCK */
+#if	1
+		vref(*vpp);
+		vn_lock(*vpp, LK_EXCLUSIVE, td);
+#else
+		vget(*vpp, LK_EXCLUSIVE, td);
+#endif
+		MARK_INT_SAT(CODA_ROOT_STATS);
+		return(0);
+	    }
+    }
+
+    error = venus_root(vftomi(vfsp), td->td_ucred, p, &VFid);
+
+    if (!error) {
+	/*
+	 * Save the new rootfid in the cnode, and rehash the cnode into the
+	 * cnode hash with the new fid key.
+	 */
+	coda_unsave(VTOC(mi->mi_rootvp));
+	VTOC(mi->mi_rootvp)->c_fid = VFid;
+	coda_save(VTOC(mi->mi_rootvp));
+
+	*vpp = mi->mi_rootvp;
+#if	1
+	vref(*vpp);
+	vn_lock(*vpp, LK_EXCLUSIVE, td);
+#else
+	vget(*vpp, LK_EXCLUSIVE, td);
+#endif
+
+	MARK_INT_SAT(CODA_ROOT_STATS);
+	goto exit;
+    } else if (error == ENODEV || error == EINTR) {
+	/* Gross hack here! */
+	/*
+	 * If Venus fails to respond to the CODA_ROOT call, coda_call returns
+	 * ENODEV. Return the uninitialized root vnode to allow vfs
+	 * operations such as unmount to continue. Without this hack,
+	 * there is no way to do an unmount if Venus dies before a 
+	 * successful CODA_ROOT call is done. All vnode operations 
+	 * will fail.
+	 */
+	*vpp = mi->mi_rootvp;
+#if	1
+	vref(*vpp);
+	vn_lock(*vpp, LK_EXCLUSIVE, td);
+#else
+	vget(*vpp, LK_EXCLUSIVE, td);
+#endif
+
+	MARK_INT_FAIL(CODA_ROOT_STATS);
+	error = 0;
+	goto exit;
+    } else {
+	CODADEBUG( CODA_ROOT, myprintf(("error %d in CODA_ROOT\n", error)); );
+	MARK_INT_FAIL(CODA_ROOT_STATS);
+		
+	goto exit;
+    }
+
+ exit:
+    return(error);
+}
+
+/*
+ * Get filesystem statistics.
+ */
+int
+coda_nb_statfs(vfsp, sbp, td)
+    register struct mount *vfsp;
+    struct statfs *sbp;
+    struct thread *td;
+{
+    ENTRY;
+/*  MARK_ENTRY(CODA_STATFS_STATS); */
+    if (!CODA_MOUNTED(vfsp)) {
+/*	MARK_INT_FAIL(CODA_STATFS_STATS);*/
+	return(EINVAL);
+    }
+    
+    /* XXX - what to do about f_flags, others? --bnoble */
+    /* Below This is what AFS does
+    	#define NB_SFS_SIZ 0x895440
+     */
+    sbp->f_flags = 0;
+    sbp->f_bsize = 8192; /* XXX */
+    sbp->f_iosize = 8192; /* XXX */
+#define NB_SFS_SIZ 0x8AB75D
+    sbp->f_blocks = NB_SFS_SIZ;
+    sbp->f_bfree = NB_SFS_SIZ;
+    sbp->f_bavail = NB_SFS_SIZ;
+    sbp->f_files = NB_SFS_SIZ;
+    sbp->f_ffree = NB_SFS_SIZ;
+/*  MARK_INT_SAT(CODA_STATFS_STATS); */
+    return(0);
+}
+
+/*
+ * Flush any pending I/O.
+ */
+int
+coda_sync(vfsp, waitfor, td)
+    struct mount *vfsp;
+    int    waitfor;
+    struct thread *td;
+{
+    ENTRY;
+    MARK_ENTRY(CODA_SYNC_STATS);
+    MARK_INT_SAT(CODA_SYNC_STATS);
+    return(0);
+}
+
+/* 
+ * fhtovp is now what vget used to be in 4.3-derived systems.  For
+ * some silly reason, vget is now keyed by a 32 bit ino_t, rather than
+ * a type-specific fid.  
+ */
+int
+coda_fhtovp(vfsp, fhp, nam, vpp, exflagsp, creadanonp)
+    register struct mount *vfsp;    
+    struct fid *fhp;
+    struct mbuf *nam;
+    struct vnode **vpp;
+    int *exflagsp;
+    struct ucred **creadanonp;
+{
+    struct cfid *cfid = (struct cfid *)fhp;
+    struct cnode *cp = 0;
+    int error;
+    struct thread *td = curthread; /* XXX -mach */
+    struct proc *p = td->td_proc;
+    CodaFid VFid;
+    int vtype;
+
+    ENTRY;
+    
+    MARK_ENTRY(CODA_VGET_STATS);
+    /* Check for vget of control object. */
+    if (IS_CTL_FID(&cfid->cfid_fid)) {
+	*vpp = coda_ctlvp;
+	vref(coda_ctlvp);
+	MARK_INT_SAT(CODA_VGET_STATS);
+	return(0);
+    }
+    
+    error = venus_fhtovp(vftomi(vfsp), &cfid->cfid_fid, td->td_ucred, p, &VFid, &vtype);
+    
+    if (error) {
+	CODADEBUG(CODA_VGET, myprintf(("vget error %d\n",error));)
+	    *vpp = (struct vnode *)0;
+    } else {
+	CODADEBUG(CODA_VGET, 
+		 myprintf(("vget: %s type %d result %d\n",
+			coda_f2s(&VFid), vtype, error)); )	    
+	cp = make_coda_node(&VFid, vfsp, vtype);
+	*vpp = CTOV(cp);
+    }
+    return(error);
+}
+
+/*
+ * To allow for greater ease of use, some vnodes may be orphaned when
+ * Venus dies.  Certain operations should still be allowed to go
+ * through, but without propagating ophan-ness.  So this function will
+ * get a new vnode for the file from the current run of Venus.  */
+ 
+int
+getNewVnode(vpp)
+     struct vnode **vpp;
+{
+    struct cfid cfid;
+    struct coda_mntinfo *mi = vftomi((*vpp)->v_mount);
+    
+    ENTRY;
+
+    cfid.cfid_len = (short)sizeof(CodaFid);
+    cfid.cfid_fid = VTOC(*vpp)->c_fid;	/* Structure assignment. */
+    /* XXX ? */
+
+    /* We're guessing that if set, the 1st element on the list is a
+     * valid vnode to use. If not, return ENODEV as venus is dead.
+     */
+    if (mi->mi_vfsp == NULL)
+	return ENODEV;
+    
+    return coda_fhtovp(mi->mi_vfsp, (struct fid*)&cfid, NULL, vpp,
+		      NULL, NULL);
+}
+
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/ufsmount.h>
+/* get the mount structure corresponding to a given device.  Assume 
+ * device corresponds to a UFS. Return NULL if no device is found.
+ */ 
+struct mount *devtomp(dev)
+    struct cdev *dev;
+{
+    struct mount *mp;
+   
+    TAILQ_FOREACH(mp, &mountlist, mnt_list) {
+	if (((VFSTOUFS(mp))->um_dev == dev)) {
+	    /* mount corresponds to UFS and the device matches one we want */
+	    return(mp); 
+	}
+    }
+    /* mount structure wasn't found */ 
+    return(NULL); 
+}
+
+struct vfsops coda_vfsops = {
+    .vfs_mount =		coda_mount,
+    .vfs_root = 		coda_root,
+    .vfs_statfs =		coda_nb_statfs,
+    .vfs_sync = 		coda_sync,
+    .vfs_unmount =		coda_unmount,
+};
+
+VFS_SET(coda_vfsops, coda, VFCF_NETWORK);
--- /dev/null
+++ sys/fs/coda/coda_psdev.c
@@ -0,0 +1,692 @@
+/*-
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_psdev.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ */
+/*-
+ * Mach Operating System
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda filesystem at Carnegie Mellon
+ * University.  Contributers include David Steere, James Kistler, and
+ * M. Satyanarayanan.  */
+
+/* 
+ * These routines define the psuedo device for communication between
+ * Coda's Venus and Minicache in Mach 2.6. They used to be in cfs_subr.c, 
+ * but I moved them to make it easier to port the Minicache without 
+ * porting coda. -- DCS 10/12/94
+ */
+
+/* These routines are the device entry points for Venus. */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/coda/coda_psdev.c,v 1.39.4.1 2008/01/23 12:09:43 rwatson Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ioccom.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/file.h>		/* must come after sys/malloc.h */
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+
+#include <fs/coda/coda.h>
+#include <fs/coda/cnode.h>
+#include <fs/coda/coda_namecache.h>
+#include <fs/coda/coda_io.h>
+#include <fs/coda/coda_psdev.h>
+
+#define CTL_C
+
+#ifdef CTL_C
+#include <sys/signalvar.h>
+#endif
+
+int coda_psdev_print_entry = 0;
+static
+int outstanding_upcalls = 0;
+int coda_call_sleep = PZERO - 1;
+#ifdef	CTL_C
+int coda_pcatch = PCATCH;
+#else
+#endif
+
+#define ENTRY if(coda_psdev_print_entry) myprintf(("Entered %s\n",__func__))
+
+void vcodaattach(int n);
+
+struct vmsg {
+    struct queue vm_chain;
+    caddr_t	 vm_data;
+    u_short	 vm_flags;
+    u_short      vm_inSize;	/* Size is at most 5000 bytes */
+    u_short	 vm_outSize;
+    u_short	 vm_opcode; 	/* copied from data to save ptr lookup */
+    int		 vm_unique;
+    caddr_t	 vm_sleep;	/* Not used by Mach. */
+};
+
+#define	VM_READ	    1
+#define	VM_WRITE    2
+#define	VM_INTR	    4
+
+/* vcodaattach: do nothing */
+void
+vcodaattach(n)
+    int n;
+{
+}
+
+int 
+vc_nb_open(dev, flag, mode, td)    
+    struct cdev *dev;      
+    int          flag;     
+    int          mode;     
+    struct thread *td;             /* NetBSD only */
+{
+    struct vcomm *vcp;
+    struct coda_mntinfo *mnt;
+    
+    ENTRY;
+
+    if (!coda_nc_initialized)
+	coda_nc_init();
+    
+    mnt = dev2coda_mntinfo(dev);
+    KASSERT(mnt, ("Coda: tried to open uninitialized cfs device"));
+
+    vcp = &mnt->mi_vcomm;
+    if (VC_OPEN(vcp))
+	return(EBUSY);
+    
+    bzero(&(vcp->vc_selproc), sizeof (struct selinfo));
+    INIT_QUEUE(vcp->vc_requests);
+    INIT_QUEUE(vcp->vc_replys);
+    MARK_VC_OPEN(vcp);
+    
+    mnt->mi_vfsp = NULL;
+    mnt->mi_rootvp = NULL;
+
+    return(0);
+}
+
+int 
+vc_nb_close (dev, flag, mode, td)    
+    struct cdev *dev;      
+    int          flag;     
+    int          mode;     
+    struct thread *td;
+{
+    register struct vcomm *vcp;
+    register struct vmsg *vmp, *nvmp = NULL;
+    struct coda_mntinfo *mi;
+    int err;
+	
+    ENTRY;
+
+    mi = dev2coda_mntinfo(dev);
+    KASSERT(mi, ("Coda: closing unknown cfs device"));
+
+    vcp = &mi->mi_vcomm;
+    KASSERT(VC_OPEN(vcp), ("Coda: closing unopened cfs device"));
+    
+    /* prevent future operations on this vfs from succeeding by auto-
+     * unmounting any vfs mounted via this device. This frees user or
+     * sysadm from having to remember where all mount points are located.
+     * Put this before WAKEUPs to avoid queuing new messages between
+     * the WAKEUP and the unmount (which can happen if we're unlucky)
+     */
+    if (!mi->mi_rootvp) {
+	/* just a simple open/close w no mount */
+	MARK_VC_CLOSED(vcp);
+	return 0;
+    }
+
+    /* Let unmount know this is for real */
+    VTOC(mi->mi_rootvp)->c_flags |= C_UNMOUNTING;
+    coda_unmounting(mi->mi_vfsp);
+
+    outstanding_upcalls = 0;
+    /* Wakeup clients so they can return. */
+    for (vmp = (struct vmsg *)GETNEXT(vcp->vc_requests);
+	 !EOQ(vmp, vcp->vc_requests);
+	 vmp = nvmp)
+    {
+    	nvmp = (struct vmsg *)GETNEXT(vmp->vm_chain);
+	/* Free signal request messages and don't wakeup cause
+	   no one is waiting. */
+	if (vmp->vm_opcode == CODA_SIGNAL) {
+	    CODA_FREE((caddr_t)vmp->vm_data, (u_int)VC_IN_NO_DATA);
+	    CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg));
+	    continue;
+	}
+	outstanding_upcalls++;	
+	wakeup(&vmp->vm_sleep);
+    }
+
+    for (vmp = (struct vmsg *)GETNEXT(vcp->vc_replys);
+	 !EOQ(vmp, vcp->vc_replys);
+	 vmp = (struct vmsg *)GETNEXT(vmp->vm_chain))
+    {
+	outstanding_upcalls++;	
+	wakeup(&vmp->vm_sleep);
+    }
+
+    MARK_VC_CLOSED(vcp);
+
+    if (outstanding_upcalls) {
+#ifdef	CODA_VERBOSE
+	printf("presleep: outstanding_upcalls = %d\n", outstanding_upcalls);
+    	(void) tsleep(&outstanding_upcalls, coda_call_sleep, "coda_umount", 0);
+	printf("postsleep: outstanding_upcalls = %d\n", outstanding_upcalls);
+#else
+    	(void) tsleep(&outstanding_upcalls, coda_call_sleep, "coda_umount", 0);
+#endif
+    }
+
+    err = dounmount(mi->mi_vfsp, flag, td);
+    if (err)
+	myprintf(("Error %d unmounting vfs in vcclose(%s)\n", 
+	           err, devtoname(dev)));
+    return 0;
+}
+
+int 
+vc_nb_read(dev, uiop, flag)   
+    struct cdev *dev;  
+    struct uio  *uiop; 
+    int          flag;
+{
+    register struct vcomm *	vcp;
+    register struct vmsg *vmp;
+    int error = 0;
+    
+    ENTRY;
+
+    vcp = &dev2coda_mntinfo(dev)->mi_vcomm;
+    /* Get message at head of request queue. */
+    if (EMPTY(vcp->vc_requests))
+	return(0);	/* Nothing to read */
+    
+    vmp = (struct vmsg *)GETNEXT(vcp->vc_requests);
+    
+    /* Move the input args into userspace */
+    uiop->uio_rw = UIO_READ;
+    error = uiomove(vmp->vm_data, vmp->vm_inSize, uiop);
+    if (error) {
+	myprintf(("vcread: error (%d) on uiomove\n", error));
+	error = EINVAL;
+    }
+
+#ifdef OLD_DIAGNOSTIC    
+    if (vmp->vm_chain.forw == 0 || vmp->vm_chain.back == 0)
+	panic("vc_nb_read: bad chain");
+#endif
+
+    REMQUE(vmp->vm_chain);
+    
+    /* If request was a signal, free up the message and don't
+       enqueue it in the reply queue. */
+    if (vmp->vm_opcode == CODA_SIGNAL) {
+	if (codadebug)
+	    myprintf(("vcread: signal msg (%d, %d)\n", 
+		      vmp->vm_opcode, vmp->vm_unique));
+	CODA_FREE((caddr_t)vmp->vm_data, (u_int)VC_IN_NO_DATA);
+	CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg));
+	return(error);
+    }
+    
+    vmp->vm_flags |= VM_READ;
+    INSQUE(vmp->vm_chain, vcp->vc_replys);
+    
+    return(error);
+}
+
+int
+vc_nb_write(dev, uiop, flag)   
+    struct cdev *dev;  
+    struct uio  *uiop; 
+    int          flag;
+{
+    register struct vcomm *	vcp;
+    register struct vmsg *vmp;
+    struct coda_out_hdr *out;
+    u_long seq;
+    u_long opcode;
+    int buf[2];
+    int error = 0;
+
+    ENTRY;
+
+    vcp = &dev2coda_mntinfo(dev)->mi_vcomm;
+    
+    /* Peek at the opcode, unique without transfering the data. */
+    uiop->uio_rw = UIO_WRITE;
+    error = uiomove((caddr_t)buf, sizeof(int) * 2, uiop);
+    if (error) {
+	myprintf(("vcwrite: error (%d) on uiomove\n", error));
+	return(EINVAL);
+    }
+    
+    opcode = buf[0];
+    seq = buf[1];
+	
+    if (codadebug)
+	myprintf(("vcwrite got a call for %ld.%ld\n", opcode, seq));
+    
+    if (DOWNCALL(opcode)) {
+	union outputArgs pbuf;
+	
+	/* get the rest of the data. */
+	uiop->uio_rw = UIO_WRITE;
+	error = uiomove((caddr_t)&pbuf.coda_purgeuser.oh.result, sizeof(pbuf) - (sizeof(int)*2), uiop);
+	if (error) {
+	    myprintf(("vcwrite: error (%d) on uiomove (Op %ld seq %ld)\n", 
+		      error, opcode, seq));
+	    return(EINVAL);
+	    }
+	
+	return handleDownCall(opcode, &pbuf);
+    }
+    
+    /* Look for the message on the (waiting for) reply queue. */
+    for (vmp = (struct vmsg *)GETNEXT(vcp->vc_replys);
+	 !EOQ(vmp, vcp->vc_replys);
+	 vmp = (struct vmsg *)GETNEXT(vmp->vm_chain))
+    {
+	if (vmp->vm_unique == seq) break;
+    }
+    
+    if (EOQ(vmp, vcp->vc_replys)) {
+	if (codadebug)
+	    myprintf(("vcwrite: msg (%ld, %ld) not found\n", opcode, seq));
+	
+	return(ESRCH);
+	}
+    
+    /* Remove the message from the reply queue */
+    REMQUE(vmp->vm_chain);
+    
+    /* move data into response buffer. */
+    out = (struct coda_out_hdr *)vmp->vm_data;
+    /* Don't need to copy opcode and uniquifier. */
+    
+    /* get the rest of the data. */
+    if (vmp->vm_outSize < uiop->uio_resid) {
+	myprintf(("vcwrite: more data than asked for (%d < %d)\n",
+		  vmp->vm_outSize, uiop->uio_resid));
+	wakeup(&vmp->vm_sleep); 	/* Notify caller of the error. */
+	return(EINVAL);
+    } 
+    
+    buf[0] = uiop->uio_resid; 	/* Save this value. */
+    uiop->uio_rw = UIO_WRITE;
+    error = uiomove((caddr_t) &out->result, vmp->vm_outSize - (sizeof(int) * 2), uiop);
+    if (error) {
+	myprintf(("vcwrite: error (%d) on uiomove (op %ld seq %ld)\n", 
+		  error, opcode, seq));
+	return(EINVAL);
+    }
+    
+    /* I don't think these are used, but just in case. */
+    /* XXX - aren't these two already correct? -bnoble */
+    out->opcode = opcode;
+    out->unique = seq;
+    vmp->vm_outSize	= buf[0];	/* Amount of data transferred? */
+    vmp->vm_flags |= VM_WRITE;
+
+    error = 0;
+    if (opcode == CODA_OPEN_BY_FD) {
+	struct coda_open_by_fd_out *tmp = (struct coda_open_by_fd_out *)out;
+	struct file *fp;
+	struct vnode *vp = NULL;
+
+	if (tmp->oh.result == 0) {
+	    error = getvnode(uiop->uio_td->td_proc->p_fd, tmp->fd, &fp);
+	    if (!error) {
+		mtx_lock(&Giant);
+		vp = fp->f_vnode;
+		VREF(vp);
+		fdrop(fp, uiop->uio_td);
+		mtx_unlock(&Giant);
+	    }
+	}
+	tmp->vp = vp;
+    }
+
+    wakeup(&vmp->vm_sleep);
+    
+    return(error);
+}
+
+int
+vc_nb_ioctl(dev, cmd, addr, flag, td) 
+    struct cdev *dev;       
+    u_long        cmd;       
+    caddr_t       addr;      
+    int           flag;      
+    struct thread *td;
+{
+    ENTRY;
+
+    switch(cmd) {
+    case CODARESIZE: {
+	struct coda_resize *data = (struct coda_resize *)addr;
+	return(coda_nc_resize(data->hashsize, data->heapsize, IS_DOWNCALL));
+	break;
+    }
+    case CODASTATS:
+	if (coda_nc_use) {
+	    coda_nc_gather_stats();
+	    return(0);
+	} else {
+	    return(ENODEV);
+	}
+	break;
+    case CODAPRINT:
+	if (coda_nc_use) {
+	    print_coda_nc();
+	    return(0);
+	} else {
+	    return(ENODEV);
+	}
+	break;
+    case CIOC_KERNEL_VERSION:
+	switch (*(u_int *)addr) {
+	case 0:
+		*(u_int *)addr = coda_kernel_version;
+		return 0;
+		break;
+	case 1:
+	case 2:
+		if (coda_kernel_version != *(u_int *)addr)
+		    return ENOENT;
+		else
+		    return 0;
+	default:
+		return ENOENT;
+	}
+    	break;
+    default :
+	return(EINVAL);
+	break;
+    }
+}
+
+int
+vc_nb_poll(dev, events, td)         
+    struct cdev *dev;    
+    int           events;   
+    struct thread *td;
+{
+    register struct vcomm *vcp;
+    int event_msk = 0;
+
+    ENTRY;
+    
+    vcp = &dev2coda_mntinfo(dev)->mi_vcomm;
+    
+    event_msk = events & (POLLIN|POLLRDNORM);
+    if (!event_msk)
+	return(0);
+    
+    if (!EMPTY(vcp->vc_requests))
+	return(events & (POLLIN|POLLRDNORM));
+
+    selrecord(td, &(vcp->vc_selproc));
+    
+    return(0);
+}
+
+/*
+ * Statistics
+ */
+struct coda_clstat coda_clstat;
+
+/* 
+ * Key question: whether to sleep interuptably or uninteruptably when
+ * waiting for Venus.  The former seems better (cause you can ^C a
+ * job), but then GNU-EMACS completion breaks. Use tsleep with no
+ * timeout, and no longjmp happens. But, when sleeping
+ * "uninterruptibly", we don't get told if it returns abnormally
+ * (e.g. kill -9).  
+ */
+
+int
+coda_call(mntinfo, inSize, outSize, buffer) 
+     struct coda_mntinfo *mntinfo; int inSize; int *outSize; caddr_t buffer;
+{
+	struct vcomm *vcp;
+	struct vmsg *vmp;
+	int error;
+#ifdef	CTL_C
+	struct thread *td = curthread;
+	struct proc *p = td->td_proc;
+	sigset_t psig_omask;
+	sigset_t tempset;
+	int i;
+#endif
+	if (mntinfo == NULL) {
+	    /* Unlikely, but could be a race condition with a dying warden */
+	    return ENODEV;
+	}
+
+	vcp = &(mntinfo->mi_vcomm);
+	
+	coda_clstat.ncalls++;
+	coda_clstat.reqs[((struct coda_in_hdr *)buffer)->opcode]++;
+
+	if (!VC_OPEN(vcp))
+	    return(ENODEV);
+
+	CODA_ALLOC(vmp,struct vmsg *,sizeof(struct vmsg));
+	/* Format the request message. */
+	vmp->vm_data = buffer;
+	vmp->vm_flags = 0;
+	vmp->vm_inSize = inSize;
+	vmp->vm_outSize 
+	    = *outSize ? *outSize : inSize; /* |buffer| >= inSize */
+	vmp->vm_opcode = ((struct coda_in_hdr *)buffer)->opcode;
+	vmp->vm_unique = ++vcp->vc_seq;
+	if (codadebug)
+	    myprintf(("Doing a call for %d.%d\n", 
+		      vmp->vm_opcode, vmp->vm_unique));
+	
+	/* Fill in the common input args. */
+	((struct coda_in_hdr *)buffer)->unique = vmp->vm_unique;
+
+	/* Append msg to request queue and poke Venus. */
+	INSQUE(vmp->vm_chain, vcp->vc_requests);
+	selwakeuppri(&(vcp->vc_selproc), coda_call_sleep);
+
+	/* We can be interrupted while we wait for Venus to process
+	 * our request.  If the interrupt occurs before Venus has read
+	 * the request, we dequeue and return. If it occurs after the
+	 * read but before the reply, we dequeue, send a signal
+	 * message, and return. If it occurs after the reply we ignore
+	 * it. In no case do we want to restart the syscall.  If it
+	 * was interrupted by a venus shutdown (vcclose), return
+	 * ENODEV.  */
+
+	/* Ignore return, We have to check anyway */
+#ifdef	CTL_C
+	/* This is work in progress.  Setting coda_pcatch lets tsleep reawaken
+	   on a ^c or ^z.  The problem is that emacs sets certain interrupts
+	   as SA_RESTART.  This means that we should exit sleep handle the
+	   "signal" and then go to sleep again.  Mostly this is done by letting
+	   the syscall complete and be restarted.  We are not idempotent and 
+	   can not do this.  A better solution is necessary.
+	 */
+	i = 0;
+	PROC_LOCK(p);
+	psig_omask = td->td_sigmask;
+	do {
+		error = msleep(&vmp->vm_sleep, &p->p_mtx,
+			       (coda_call_sleep|coda_pcatch), "coda_call",
+			       hz*2);
+		if (error == 0)
+			break;
+		else if (error == EWOULDBLOCK) {
+#ifdef	CODA_VERBOSE
+			printf("coda_call: tsleep TIMEOUT %d sec\n", 2+2*i);
+#endif
+		}
+		else {
+			SIGEMPTYSET(tempset);
+			SIGADDSET(tempset, SIGIO);
+			if (SIGSETEQ(td->td_siglist, tempset)) {
+				SIGADDSET(td->td_sigmask, SIGIO);
+#ifdef	CODA_VERBOSE
+				printf("coda_call: tsleep returns %d SIGIO, cnt %d\n",
+				       error, i);
+#endif
+			} else {
+				SIGDELSET(tempset, SIGIO);
+				SIGADDSET(tempset, SIGALRM);
+				if (SIGSETEQ(td->td_siglist, tempset)) {
+					SIGADDSET(td->td_sigmask, SIGALRM);
+#ifdef	CODA_VERBOSE
+					printf("coda_call: tsleep returns %d SIGALRM, cnt %d\n",
+					       error, i);
+#endif
+				}
+				else {
+#ifdef	CODA_VERBOSE
+					printf("coda_call: tsleep returns %d, cnt %d\n",
+					       error, i);
+#endif
+
+#ifdef notyet
+					tempset = td->td_siglist;
+					SIGSETNAND(tempset, td->td_sigmask);
+					printf("coda_call: siglist = %p, sigmask = %p, mask %p\n",
+					       td->td_siglist, td->td_sigmask,
+					       tempset);
+					break;
+					SIGSETOR(td->td_sigmask, td->td_siglist);
+					tempset = td->td_siglist;
+					SIGSETNAND(tempset, td->td_sigmask);
+					printf("coda_call: new mask, siglist = %p, sigmask = %p, mask %p\n",
+					       td->td_siglist, td->td_sigmask,
+					       tempset);
+#endif
+				}
+			}
+		}
+	} while (error && i++ < 128 && VC_OPEN(vcp));
+	td->td_sigmask = psig_omask;
+	signotify(td);
+	PROC_UNLOCK(p);
+#else
+	(void) tsleep(&vmp->vm_sleep, coda_call_sleep, "coda_call", 0);
+#endif
+	if (VC_OPEN(vcp)) {	/* Venus is still alive */
+ 	/* Op went through, interrupt or not... */
+	    if (vmp->vm_flags & VM_WRITE) {
+		error = 0;
+		*outSize = vmp->vm_outSize;
+	    }
+
+	    else if (!(vmp->vm_flags & VM_READ)) { 
+		/* Interrupted before venus read it. */
+#ifdef	CODA_VERBOSE
+		if (1)
+#else
+		if (codadebug)
+#endif
+		    myprintf(("interrupted before read: op = %d.%d, flags = %x\n",
+			   vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags));
+		REMQUE(vmp->vm_chain);
+		error = EINTR;
+	    }
+	    
+	    else { 	
+		/* (!(vmp->vm_flags & VM_WRITE)) means interrupted after
+                   upcall started */
+		/* Interrupted after start of upcall, send venus a signal */
+		struct coda_in_hdr *dog;
+		struct vmsg *svmp;
+		
+#ifdef	CODA_VERBOSE
+		if (1)
+#else
+		if (codadebug)
+#endif
+		    myprintf(("Sending Venus a signal: op = %d.%d, flags = %x\n",
+			   vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags));
+		
+		REMQUE(vmp->vm_chain);
+		error = EINTR;
+		
+		CODA_ALLOC(svmp, struct vmsg *, sizeof (struct vmsg));
+
+		CODA_ALLOC((svmp->vm_data), char *, sizeof (struct coda_in_hdr));
+		dog = (struct coda_in_hdr *)svmp->vm_data;
+		
+		svmp->vm_flags = 0;
+		dog->opcode = svmp->vm_opcode = CODA_SIGNAL;
+		dog->unique = svmp->vm_unique = vmp->vm_unique;
+		svmp->vm_inSize = sizeof (struct coda_in_hdr);
+/*??? rvb */	svmp->vm_outSize = sizeof (struct coda_in_hdr);
+		
+		if (codadebug)
+		    myprintf(("coda_call: enqueing signal msg (%d, %d)\n",
+			   svmp->vm_opcode, svmp->vm_unique));
+		
+		/* insert at head of queue! */
+		INSQUE(svmp->vm_chain, vcp->vc_requests);
+		selwakeuppri(&(vcp->vc_selproc), coda_call_sleep);
+	    }
+	}
+
+	else {	/* If venus died (!VC_OPEN(vcp)) */
+	    if (codadebug)
+		myprintf(("vcclose woke op %d.%d flags %d\n",
+		       vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags));
+	    
+		error = ENODEV;
+	}
+
+	CODA_FREE(vmp, sizeof(struct vmsg));
+
+	if (outstanding_upcalls > 0 && (--outstanding_upcalls == 0))
+		wakeup(&outstanding_upcalls);
+
+	if (!error)
+		error = ((struct coda_out_hdr *)buffer)->result;
+	return(error);
+}
--- /dev/null
+++ sys/fs/coda/TODO
@@ -0,0 +1,19 @@
+OOPS:
+	FreeBSD does not fsync!!!
+
+Near term:
+	Fix bug in executing/mapping new files.
+	cfs_mount bug: interaction with cfs_inactive no cfs_unsave.
+	vref/vn_lock == vget except no VXWANT which may be on.
+	Review locks: vn_lock/VOP_UNLOCK/lockmgr ...
+
+Medium term:
+	Add missing VFS methods.
+	Do performance profile.
+	Tune hash algorithm used in cfs_namecache.
+	Tune hash algorithm used in cfs_subr.
+
+Eventually:
+	Use standard queue macros.
+
+$FreeBSD: src/sys/fs/coda/TODO,v 1.3 2007/07/12 20:40:37 rwatson Exp $
--- /dev/null
+++ sys/fs/coda/coda_pioctl.h
@@ -0,0 +1,70 @@
+/*-
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_pioctl.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ * $FreeBSD: src/sys/fs/coda/coda_pioctl.h,v 1.9 2007/07/12 20:40:37 rwatson Exp $
+ * 
+ */
+
+/*-
+ * Mach Operating System
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * Copyright (c) 1988 Carnegie-Mellon University
+ * Copyright (c) 1987 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * ITC Remote filesystem - vice ioctl interface module
+ */
+
+/*
+ *  TODO:  Find /usr/local/include/viceioctl.h.
+ */
+
+#ifndef	_SYS_PIOCTL_H_
+#define _SYS_PIOCTL_H_
+
+/* The 2K limits above are a consequence of the size of the kernel buffer
+   used to buffer requests from the user to venus--2*MAXPATHLEN.
+   The buffer pointers may be null, or the counts may be 0 if there
+   are no input or output parameters
+ */
+
+#define _VICEIOCTL(id)  ((unsigned int ) _IOW('V', id, struct ViceIoctl))
+
+/* Use this macro to define up to 256 vice ioctl's.  These ioctl's
+   all potentially have in/out parameters--this depends upon the
+   values in the ViceIoctl structure.  This structure is itself passed
+   into the kernel by the normal ioctl parameter passing mechanism.
+ */
+
+#define _VALIDVICEIOCTL(com) (com >= _VICEIOCTL(0) && com <= _VICEIOCTL(255))
+
+#endif
--- /dev/null
+++ sys/fs/coda/coda_vfsops.h
@@ -0,0 +1,62 @@
+/*-
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/cfs/coda_vfsops.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ * $FreeBSD: src/sys/fs/coda/coda_vfsops.h,v 1.12 2007/07/12 20:40:38 rwatson Exp $
+ * 
+ */
+
+/*
+ * cfid structure:
+ * This overlays the fid structure (see vfs.h)
+ * Only used below and will probably go away.
+ */
+
+struct cfid {
+    u_short	cfid_len;
+    u_short     padding;
+    CodaFid	cfid_fid;
+};
+
+struct mbuf;
+struct mount;
+
+int coda_vfsopstats_init(void);
+int coda_fhtovp(struct mount *, struct fid *, struct mbuf *, struct vnode **,
+                      int *, struct ucred **);
+
+vfs_mount_t	coda_mount;
+vfs_unmount_t	coda_unmount;
+vfs_root_t	coda_root;
+vfs_quotactl_t	coda_quotactl;
+vfs_statfs_t	coda_nb_statfs;
+vfs_sync_t	coda_sync;
+vfs_vget_t	coda_vget;
+vfs_init_t	coda_init;
+
+int getNewVnode(struct vnode **vpp);
--- /dev/null
+++ sys/fs/coda/coda_kernel.h
@@ -0,0 +1,66 @@
+/*-
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_kernel.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ * $FreeBSD: src/sys/fs/coda/coda_kernel.h,v 1.6 2007/07/12 20:40:37 rwatson Exp $
+ * 
+ */
+
+/* Macros to manipulate the queue */
+#ifndef INIT_QUEUE
+struct queue {
+    struct queue *forw, *back;
+};
+
+#define INIT_QUEUE(head)                     \
+do {                                         \
+    (head).forw = (struct queue *)&(head);   \
+    (head).back = (struct queue *)&(head);   \
+} while (0)
+
+#define GETNEXT(head) (head).forw
+
+#define EMPTY(head) ((head).forw == &(head))
+
+#define EOQ(el, head) ((struct queue *)(el) == (struct queue *)&(head))
+		   
+#define INSQUE(el, head)                             \
+do {                                                 \
+	(el).forw = ((head).back)->forw;             \
+	(el).back = (head).back;                     \
+	((head).back)->forw = (struct queue *)&(el); \
+	(head).back = (struct queue *)&(el);         \
+} while (0)
+
+#define REMQUE(el)                         \
+do {                                       \
+	((el).forw)->back = (el).back;     \
+	(el).back->forw = (el).forw;       \
+}  while (0)
+
+#endif
--- /dev/null
+++ sys/fs/coda/coda_fbsd.c
@@ -0,0 +1,141 @@
+/*-
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_fbsd.cr,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/coda/coda_fbsd.c,v 1.46 2007/07/12 21:04:57 rwatson Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/ucred.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+#include <vm/vnode_pager.h>
+
+#include <fs/coda/coda.h>
+#include <fs/coda/cnode.h>
+#include <fs/coda/coda_vnops.h>
+#include <fs/coda/coda_psdev.h>
+
+static struct cdevsw codadevsw = {
+	.d_version =	D_VERSION,
+	.d_flags =	D_NEEDGIANT,
+	.d_open =	vc_nb_open,
+	.d_close =	vc_nb_close,
+	.d_read =	vc_nb_read,
+	.d_write =	vc_nb_write,
+	.d_ioctl =	vc_nb_ioctl,
+	.d_poll =	vc_nb_poll,
+	.d_name =	"Coda",
+};
+
+static eventhandler_tag clonetag;
+
+static LIST_HEAD(, coda_mntinfo) coda_mnttbl;
+
+int     vcdebug = 1;
+#define VCDEBUG if (vcdebug) printf
+
+/* for DEVFS, using bpf & tun drivers as examples*/
+static void coda_fbsd_clone(void *arg, struct ucred *cred, char *name,
+    int namelen, struct cdev **dev);
+
+static int
+codadev_modevent(module_t mod, int type, void *data)
+{
+	struct coda_mntinfo	*mnt;
+
+	switch (type) {
+	case MOD_LOAD:
+		LIST_INIT(&coda_mnttbl);
+		clonetag = EVENTHANDLER_REGISTER(dev_clone, coda_fbsd_clone,
+		    0, 1000);
+		break;
+	case MOD_UNLOAD:
+		EVENTHANDLER_DEREGISTER(dev_clone, clonetag);
+		while ((mnt = LIST_FIRST(&coda_mnttbl)) != NULL) {
+			LIST_REMOVE(mnt, mi_list);
+			destroy_dev(mnt->dev);
+			free(mnt, M_CODA);
+		}
+		break;
+
+	default:
+		return (EOPNOTSUPP);
+	}
+	return 0;
+}
+static moduledata_t codadev_mod = {
+	"codadev",
+	codadev_modevent,
+	NULL
+};
+DECLARE_MODULE(codadev, codadev_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
+
+static void coda_fbsd_clone(arg, cred, name, namelen, dev)
+    void *arg;
+    struct ucred *cred;
+    char *name;
+    int namelen;
+    struct cdev **dev;
+{
+    int u;
+    struct coda_mntinfo *mnt;
+
+    if (*dev != NULL)
+	return;
+    if (dev_stdclone(name,NULL,"cfs",&u) != 1)
+	return;
+
+    *dev = make_dev(&codadevsw,unit2minor(u),UID_ROOT,GID_WHEEL,0600,"cfs%d",u);
+    dev_ref(*dev);
+    mnt = malloc(sizeof(struct coda_mntinfo), M_CODA, M_WAITOK|M_ZERO);
+    LIST_INSERT_HEAD(&coda_mnttbl, mnt, mi_list);
+    mnt->dev = *dev;
+}
+
+struct coda_mntinfo *
+dev2coda_mntinfo(struct cdev *dev)
+{
+	struct coda_mntinfo	*mnt;
+
+	LIST_FOREACH(mnt, &coda_mnttbl, mi_list) {
+		if (mnt->dev == dev)
+			return mnt;
+	}
+
+	return NULL;
+}
--- /dev/null
+++ sys/fs/coda/coda_io.h
@@ -0,0 +1,59 @@
+/*-
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_io.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ * $FreeBSD: src/sys/fs/coda/coda_io.h,v 1.9 2007/07/12 20:40:37 rwatson Exp $
+ * 
+ */
+
+/*-
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda filesystem at Carnegie Mellon University.
+ * Contributers include David Steere, James Kistler, and M. Satyanarayanan.
+ */
+
+#ifndef _CODAIO_H_
+#define _CODAIO_H_
+
+/* Define ioctl commands for vcioctl, /dev/cfs */
+
+#define CODARESIZE    _IOW('c', 1, struct coda_resize ) /* Resize CODA NameCache */
+#define CODASTATS      _IO('c', 2)                      /* Collect stats */
+#define CODAPRINT      _IO('c', 3)                      /* Print Cache */
+#define CODATEST       _IO('c', 4)                      /* Print Cache */
+
+struct coda_resize { int hashsize, heapsize; };
+
+#endif
--- /dev/null
+++ sys/fs/coda/coda_venus.c
@@ -0,0 +1,671 @@
+/*-
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/cfs/coda_venus.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/coda/coda_venus.c,v 1.27 2007/07/12 21:04:57 rwatson Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/fcntl.h>
+#include <sys/ioccom.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sx.h>
+
+#include <fs/coda/coda.h>
+#include <fs/coda/cnode.h>
+#include <fs/coda/coda_venus.h>
+#include <fs/coda/coda_pioctl.h>
+
+#define DECL_NO_IN(name) 				\
+    struct coda_in_hdr *inp;				\
+    struct name ## _out *outp;				\
+    int name ## _size = sizeof (struct coda_in_hdr);	\
+    int Isize = sizeof (struct coda_in_hdr);		\
+    int Osize = sizeof (struct name ## _out);		\
+    int error
+
+#define DECL(name)					\
+    struct name ## _in *inp;				\
+    struct name ## _out *outp;				\
+    int name ## _size = sizeof (struct name ## _in);	\
+    int Isize = sizeof (struct name ## _in);		\
+    int Osize = sizeof (struct name ## _out);		\
+    int error
+
+#define DECL_NO_OUT(name)				\
+    struct name ## _in *inp;				\
+    struct coda_out_hdr *outp;				\
+    int name ## _size = sizeof (struct name ## _in);	\
+    int Isize = sizeof (struct name ## _in);		\
+    int Osize = sizeof (struct coda_out_hdr);		\
+    int error
+
+#define ALLOC_NO_IN(name)				\
+    if (Osize > name ## _size)				\
+    	name ## _size = Osize;				\
+    CODA_ALLOC(inp, struct coda_in_hdr *, name ## _size);\
+    outp = (struct name ## _out *) inp
+
+#define ALLOC(name)					\
+    if (Osize > name ## _size)				\
+    	name ## _size = Osize;				\
+    CODA_ALLOC(inp, struct name ## _in *, name ## _size);\
+    outp = (struct name ## _out *) inp
+
+#define ALLOC_NO_OUT(name)				\
+    if (Osize > name ## _size)				\
+    	name ## _size = Osize;				\
+    CODA_ALLOC(inp, struct name ## _in *, name ## _size);\
+    outp = (struct coda_out_hdr *) inp
+
+#define STRCPY(struc, name, len) \
+    bcopy(name, (char *)inp + (int)inp->struc, len); \
+    ((char*)inp + (int)inp->struc)[len++] = 0; \
+    Isize += len
+
+#ifdef CODA_COMPAT_5
+#define INIT_IN(in, op, ident, p) \
+	  (in)->opcode = (op); \
+	  sx_slock(&proctree_lock); \
+	  (in)->pid = p ? p->p_pid : -1; \
+          (in)->pgid = p ? p->p_pgid : -1; \
+          (in)->sid = (p && p->p_session && p->p_session->s_leader) ? (p->p_session->s_leader->p_pid) : -1; \
+	  sx_sunlock(&proctree_lock); \
+          if (ident != NOCRED) {                              \
+	      (in)->cred.cr_uid = ident->cr_uid;              \
+	      (in)->cred.cr_groupid = ident->cr_gid;          \
+          } else {                                            \
+	      bzero(&((in)->cred),sizeof(struct coda_cred));  \
+	      (in)->cred.cr_uid = -1;                         \
+	      (in)->cred.cr_groupid = -1;                     \
+          }
+#else
+#define INIT_IN(in, op, ident, p)                       \
+	  (in)->opcode = (op);                          \
+	  (in)->pid = p ? p->p_pid : -1;                \
+          (in)->pgid = p ? p->p_pgid : -1;              \
+          if (ident != NOCRED) {                        \
+	      (in)->uid = ident->cr_uid;              	\
+          } else {                                      \
+	      (in)->uid = -1;                           \
+          }     
+#endif
+#define	CNV_OFLAG(to, from) 				\
+    do { 						\
+	  to = 0;					\
+	  if (from & FREAD)   to |= C_O_READ; 		\
+	  if (from & FWRITE)  to |= C_O_WRITE; 		\
+	  if (from & O_TRUNC) to |= C_O_TRUNC; 		\
+	  if (from & O_EXCL)  to |= C_O_EXCL; 		\
+	  if (from & O_CREAT) to |= C_O_CREAT;		\
+    } while (0)
+
+#define CNV_VV2V_ATTR(top, fromp) \
+	do { \
+		(top)->va_type = (fromp)->va_type; \
+		(top)->va_mode = (fromp)->va_mode; \
+		(top)->va_nlink = (fromp)->va_nlink; \
+		(top)->va_uid = (fromp)->va_uid; \
+		(top)->va_gid = (fromp)->va_gid; \
+		(top)->va_fsid = VNOVAL; \
+		(top)->va_fileid = (fromp)->va_fileid; \
+		(top)->va_size = (fromp)->va_size; \
+		(top)->va_blocksize = (fromp)->va_blocksize; \
+		(top)->va_atime = (fromp)->va_atime; \
+		(top)->va_mtime = (fromp)->va_mtime; \
+		(top)->va_ctime = (fromp)->va_ctime; \
+		(top)->va_gen = (fromp)->va_gen; \
+		(top)->va_flags = (fromp)->va_flags; \
+		(top)->va_rdev = (fromp)->va_rdev; \
+		(top)->va_bytes = (fromp)->va_bytes; \
+		(top)->va_filerev = (fromp)->va_filerev; \
+		(top)->va_vaflags = VNOVAL; \
+		(top)->va_spare = VNOVAL; \
+	} while (0)
+
+#define CNV_V2VV_ATTR(top, fromp) \
+	do { \
+		(top)->va_type = (fromp)->va_type; \
+		(top)->va_mode = (fromp)->va_mode; \
+		(top)->va_nlink = (fromp)->va_nlink; \
+		(top)->va_uid = (fromp)->va_uid; \
+		(top)->va_gid = (fromp)->va_gid; \
+		(top)->va_fileid = (fromp)->va_fileid; \
+		(top)->va_size = (fromp)->va_size; \
+		(top)->va_blocksize = (fromp)->va_blocksize; \
+		(top)->va_atime = (fromp)->va_atime; \
+		(top)->va_mtime = (fromp)->va_mtime; \
+		(top)->va_ctime = (fromp)->va_ctime; \
+		(top)->va_gen = (fromp)->va_gen; \
+		(top)->va_flags = (fromp)->va_flags; \
+		(top)->va_rdev = (fromp)->va_rdev; \
+		(top)->va_bytes = (fromp)->va_bytes; \
+		(top)->va_filerev = (fromp)->va_filerev; \
+	} while (0)
+
+
+int coda_kernel_version = CODA_KERNEL_VERSION;
+
+int
+venus_root(void *mdp,
+	struct ucred *cred, struct proc *p,
+/*out*/	CodaFid *VFid)
+{
+    DECL_NO_IN(coda_root);		/* sets Isize & Osize */
+    ALLOC_NO_IN(coda_root);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(inp, CODA_ROOT, cred, p);  
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error)
+	*VFid = outp->Fid;
+
+    CODA_FREE(inp, coda_root_size);
+    return error;
+}
+
+int
+venus_open(void *mdp, CodaFid *fid, int flag,
+	struct ucred *cred, struct proc *p,
+/*out*/	struct vnode **vp)
+{
+    int cflag;
+    DECL(coda_open_by_fd);			/* sets Isize & Osize */
+    ALLOC(coda_open_by_fd);			/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_OPEN_BY_FD, cred, p);
+    inp->Fid = *fid;
+    CNV_OFLAG(cflag, flag);
+    inp->flags = cflag;
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    *vp = error ? NULL : outp->vp;
+
+    CODA_FREE(inp, coda_open_by_fd_size);
+    return error;
+}
+
+int
+venus_close(void *mdp, CodaFid *fid, int flag,
+	struct ucred *cred, struct proc *p)
+{
+    int cflag;
+    DECL_NO_OUT(coda_close);		/* sets Isize & Osize */
+    ALLOC_NO_OUT(coda_close);		/* sets inp & outp */
+
+    INIT_IN(&inp->ih, CODA_CLOSE, cred, p);
+    inp->Fid = *fid;
+    CNV_OFLAG(cflag, flag);
+    inp->flags = cflag;
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_close_size);
+    return error;
+}
+
+/*
+ * these two calls will not exist!!!  the container file is read/written
+ * directly.
+ */
+void
+venus_read(void)
+{
+}
+
+void
+venus_write(void)
+{
+}
+
+/*
+ * this is a bit sad too.  the ioctl's are for the control file, not for
+ * normal files.
+ */
+int
+venus_ioctl(void *mdp, CodaFid *fid,
+	int com, int flag, caddr_t data,
+	struct ucred *cred, struct proc *p)
+{
+    DECL(coda_ioctl);			/* sets Isize & Osize */
+    struct PioctlData *iap = (struct PioctlData *)data;
+    int tmp;
+
+    coda_ioctl_size = VC_MAXMSGSIZE;
+    ALLOC(coda_ioctl);			/* sets inp & outp */
+
+    INIT_IN(&inp->ih, CODA_IOCTL, cred, p);
+    inp->Fid = *fid;
+
+    /* command was mutated by increasing its size field to reflect the  
+     * path and follow args. we need to subtract that out before sending
+     * the command to venus.
+     */
+    inp->cmd = (com & ~(IOCPARM_MASK << 16));
+    tmp = ((com >> 16) & IOCPARM_MASK) - sizeof (char *) - sizeof (int);
+    inp->cmd |= (tmp & IOCPARM_MASK) <<	16;
+
+    inp->rwflag = flag;
+    inp->len = iap->vi.in_size;
+    inp->data = (char *)(sizeof (struct coda_ioctl_in));
+
+    error = copyin(iap->vi.in, (char*)inp + (long)inp->data, 
+		   iap->vi.in_size);
+    if (error) {
+	CODA_FREE(inp, coda_ioctl_size);
+	return(error);
+    }
+
+    Osize = VC_MAXMSGSIZE;
+    error = coda_call(mdp, Isize + iap->vi.in_size, &Osize, (char *)inp);
+
+	/* copy out the out buffer. */
+    if (!error) {
+	if (outp->len > iap->vi.out_size) {
+	    error = EINVAL;
+	} else {
+	    error = copyout((char *)outp + (long)outp->data, 
+			    iap->vi.out, iap->vi.out_size);
+	}
+    }
+
+    CODA_FREE(inp, coda_ioctl_size);
+    return error;
+}
+
+int
+venus_getattr(void *mdp, CodaFid *fid,
+	struct ucred *cred, struct proc *p,
+/*out*/	struct vattr *vap)
+{
+    DECL(coda_getattr);			/* sets Isize & Osize */
+    ALLOC(coda_getattr);			/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_GETATTR, cred, p);
+    inp->Fid = *fid;
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error) {
+	CNV_VV2V_ATTR(vap, &outp->attr);
+    }
+
+    CODA_FREE(inp, coda_getattr_size);
+    return error;
+}
+
+int
+venus_setattr(void *mdp, CodaFid *fid, struct vattr *vap,
+	struct ucred *cred, struct proc *p)
+{
+    DECL_NO_OUT(coda_setattr);		/* sets Isize & Osize */
+    ALLOC_NO_OUT(coda_setattr);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_SETATTR, cred, p);
+    inp->Fid = *fid;
+    CNV_V2VV_ATTR(&inp->attr, vap);
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_setattr_size);
+    return error;
+}
+
+int
+venus_access(void *mdp, CodaFid *fid, int mode,
+	struct ucred *cred, struct proc *p)
+{
+    DECL_NO_OUT(coda_access);		/* sets Isize & Osize */
+    ALLOC_NO_OUT(coda_access);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_ACCESS, cred, p);
+    inp->Fid = *fid;
+    /* NOTE:
+     * NetBSD and Venus internals use the "data" in the low 3 bits.
+     * Hence, the conversion.
+     */
+    inp->flags = mode>>6;
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_access_size);
+    return error;
+}
+
+int
+venus_readlink(void *mdp, CodaFid *fid,
+	struct ucred *cred, struct proc *p,
+/*out*/	char **str, int *len)
+{
+    DECL(coda_readlink);			/* sets Isize & Osize */
+    coda_readlink_size += CODA_MAXPATHLEN;
+    ALLOC(coda_readlink);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_READLINK, cred, p);
+    inp->Fid = *fid;
+
+    Osize += CODA_MAXPATHLEN;
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error) {
+	    CODA_ALLOC(*str, char *, outp->count);
+	    *len = outp->count;
+	    bcopy((char *)outp + (long)outp->data, *str, *len);
+    }
+
+    CODA_FREE(inp, coda_readlink_size);
+    return error;
+}
+
+int
+venus_fsync(void *mdp, CodaFid *fid, struct proc *p)
+{
+    DECL_NO_OUT(coda_fsync);		/* sets Isize & Osize */
+    ALLOC_NO_OUT(coda_fsync);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_FSYNC, NOCRED, p);	/* XXX: should be cached mount cred */
+    inp->Fid = *fid;
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_fsync_size);
+    return error;
+}
+
+int
+venus_lookup(void *mdp, CodaFid *fid,
+    	const char *nm, int len,
+	struct ucred *cred, struct proc *p,
+/*out*/	CodaFid *VFid, int *vtype)
+{
+    DECL(coda_lookup);			/* sets Isize & Osize */
+    coda_lookup_size += len + 1;
+    ALLOC(coda_lookup);			/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_LOOKUP, cred, p);
+    inp->Fid = *fid;
+
+    /* NOTE:
+     * Between version 1 and version 2 we have added an extra flag field
+     * to this structure.  But because the string was at the end and because
+     * of the wierd way we represent strings by having the slot point to
+     * where the string characters are in the "heap", we can just slip the
+     * flag parameter in after the string slot pointer and veni that don't
+     * know better won't see this new flag field ...
+     * Otherwise we'd need two different venus_lookup functions.
+     */
+    inp->name = Isize;
+    inp->flags = CLU_CASE_SENSITIVE;	/* doesn't really matter for BSD */
+    STRCPY(name, nm, len);		/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error) {
+	*VFid = outp->Fid;
+	*vtype = outp->vtype;
+    }
+
+    CODA_FREE(inp, coda_lookup_size);
+    return error;
+}
+
+int
+venus_create(void *mdp, CodaFid *fid,
+    	const char *nm, int len, int exclusive, int mode, struct vattr *va,
+	struct ucred *cred, struct proc *p,
+/*out*/	CodaFid *VFid, struct vattr *attr)
+{
+    DECL(coda_create);			/* sets Isize & Osize */
+    coda_create_size += len + 1;
+    ALLOC(coda_create);			/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_CREATE, cred, p);
+    inp->Fid = *fid;
+    inp->excl = exclusive ? C_O_EXCL : 0;
+    inp->mode = mode;
+    CNV_V2VV_ATTR(&inp->attr, va);
+
+    inp->name = Isize;
+    STRCPY(name, nm, len);		/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error) {
+	*VFid = outp->Fid;
+	CNV_VV2V_ATTR(attr, &outp->attr);
+    }
+
+    CODA_FREE(inp, coda_create_size);
+    return error;
+}
+
+int
+venus_remove(void *mdp, CodaFid *fid,
+        const char *nm, int len,
+	struct ucred *cred, struct proc *p)
+{
+    DECL_NO_OUT(coda_remove);		/* sets Isize & Osize */
+    coda_remove_size += len + 1;
+    ALLOC_NO_OUT(coda_remove);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_REMOVE, cred, p);
+    inp->Fid = *fid;
+
+    inp->name = Isize;
+    STRCPY(name, nm, len);		/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_remove_size);
+    return error;
+}
+
+int
+venus_link(void *mdp, CodaFid *fid, CodaFid *tfid,
+        const char *nm, int len,
+	struct ucred *cred, struct proc *p)
+{
+    DECL_NO_OUT(coda_link);		/* sets Isize & Osize */
+    coda_link_size += len + 1;
+    ALLOC_NO_OUT(coda_link);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_LINK, cred, p);
+    inp->sourceFid = *fid;
+    inp->destFid = *tfid;
+
+    inp->tname = Isize;
+    STRCPY(tname, nm, len);		/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_link_size);
+    return error;
+}
+
+int
+venus_rename(void *mdp, CodaFid *fid, CodaFid *tfid,
+        const char *nm, int len, const char *tnm, int tlen,
+	struct ucred *cred, struct proc *p)
+{
+    DECL_NO_OUT(coda_rename);		/* sets Isize & Osize */
+    coda_rename_size += len + 1 + tlen + 1;
+    ALLOC_NO_OUT(coda_rename);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_RENAME, cred, p);
+    inp->sourceFid = *fid;
+    inp->destFid = *tfid;
+
+    inp->srcname = Isize;
+    STRCPY(srcname, nm, len);		/* increments Isize */
+
+    inp->destname = Isize;
+    STRCPY(destname, tnm, tlen);	/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_rename_size);
+    return error;
+}
+
+int
+venus_mkdir(void *mdp, CodaFid *fid,
+    	const char *nm, int len, struct vattr *va,
+	struct ucred *cred, struct proc *p,
+/*out*/	CodaFid *VFid, struct vattr *ova)
+{
+    DECL(coda_mkdir);			/* sets Isize & Osize */
+    coda_mkdir_size += len + 1;
+    ALLOC(coda_mkdir);			/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_MKDIR, cred, p);
+    inp->Fid = *fid;
+    CNV_V2VV_ATTR(&inp->attr, va);
+
+    inp->name = Isize;
+    STRCPY(name, nm, len);		/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error) {
+	*VFid = outp->Fid;
+	CNV_VV2V_ATTR(ova, &outp->attr);
+    }
+
+    CODA_FREE(inp, coda_mkdir_size);
+    return error;
+}
+
+int
+venus_rmdir(void *mdp, CodaFid *fid,
+    	const char *nm, int len,
+	struct ucred *cred, struct proc *p)
+{
+    DECL_NO_OUT(coda_rmdir);		/* sets Isize & Osize */
+    coda_rmdir_size += len + 1;
+    ALLOC_NO_OUT(coda_rmdir);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_RMDIR, cred, p);
+    inp->Fid = *fid;
+
+    inp->name = Isize;
+    STRCPY(name, nm, len);		/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_rmdir_size);
+    return error;
+}
+
+int
+venus_symlink(void *mdp, CodaFid *fid,
+        const char *lnm, int llen, const char *nm, int len, struct vattr *va,
+	struct ucred *cred, struct proc *p)
+{
+    DECL_NO_OUT(coda_symlink);		/* sets Isize & Osize */
+    coda_symlink_size += llen + 1 + len + 1;
+    ALLOC_NO_OUT(coda_symlink);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_SYMLINK, cred, p);
+    inp->Fid = *fid;
+    CNV_V2VV_ATTR(&inp->attr, va);
+
+    inp->srcname = Isize;
+    STRCPY(srcname, lnm, llen);		/* increments Isize */
+
+    inp->tname = Isize;
+    STRCPY(tname, nm, len);		/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_symlink_size);
+    return error;
+}
+
+int
+venus_readdir(void *mdp, CodaFid *fid,
+    	int count, int offset,
+	struct ucred *cred, struct proc *p,
+/*out*/	char *buffer, int *len)
+{
+    DECL(coda_readdir);			/* sets Isize & Osize */
+    coda_readdir_size = VC_MAXMSGSIZE;
+    ALLOC(coda_readdir);			/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_READDIR, cred, p);
+    inp->Fid = *fid;
+    inp->count = count;
+    inp->offset = offset;
+
+    Osize = VC_MAXMSGSIZE;
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error) {
+	bcopy((char *)outp + (long)outp->data, buffer, outp->size);
+	*len = outp->size;
+    }
+
+    CODA_FREE(inp, coda_readdir_size);
+    return error;
+}
+
+int
+venus_fhtovp(void *mdp, CodaFid *fid,
+	struct ucred *cred, struct proc *p,
+/*out*/	CodaFid *VFid, int *vtype)
+{
+    DECL(coda_vget);			/* sets Isize & Osize */
+    ALLOC(coda_vget);			/* sets inp & outp */
+
+    /* Send the open to Venus. */
+    INIT_IN(&inp->ih, CODA_VGET, cred, p);
+    inp->Fid = *fid;
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error) {
+	*VFid = outp->Fid;
+	*vtype = outp->vtype;
+    }
+
+    CODA_FREE(inp, coda_vget_size);
+    return error;
+}
--- /dev/null
+++ sys/fs/coda/coda_subr.c
@@ -0,0 +1,573 @@
+/*-
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_subr.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ */
+/*-
+ * Mach Operating System
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda filesystem at Carnegie Mellon
+ * University.  Contributers include David Steere, James Kistler, and
+ * M. Satyanarayanan.
+ */
+
+/* NOTES: rvb
+ * 1.	Added coda_unmounting to mark all cnodes as being UNMOUNTING.  This has to
+ *	 be done before dounmount is called.  Because some of the routines that
+ *	 dounmount calls before coda_unmounted might try to force flushes to venus.
+ *	 The vnode pager does this.
+ * 2.	coda_unmounting marks all cnodes scanning coda_cache.
+ * 3.	cfs_checkunmounting (under DEBUG) checks all cnodes by chasing the vnodes
+ *	 under the /coda mount point.
+ * 4.	coda_cacheprint (under DEBUG) prints names with vnode/cnode address
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/coda/coda_subr.c,v 1.33 2007/07/12 21:04:57 rwatson Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/mount.h>
+
+#include <fs/coda/coda.h>
+#include <fs/coda/cnode.h>
+#include <fs/coda/coda_subr.h>
+#include <fs/coda/coda_namecache.h>
+
+int coda_active = 0;
+int coda_reuse = 0;
+int coda_new = 0;
+
+struct cnode *coda_freelist = NULL;
+struct cnode *coda_cache[CODA_CACHESIZE];
+
+#define	CNODE_NEXT(cp)	((cp)->c_next)
+
+#ifdef CODA_COMPAT_5
+#define coda_hash(fid) \
+    (((fid)->Volume + (fid)->Vnode) & (CODA_CACHESIZE-1))
+#define IS_DIR(cnode)        (cnode.Vnode & 0x1)
+#else
+#define coda_hash(fid) (coda_f2i(fid) & (CODA_CACHESIZE-1))
+#define IS_DIR(cnode)        (cnode.opaque[2] & 0x1)
+#endif
+
+/*
+ * Allocate a cnode.
+ */
+struct cnode *
+coda_alloc(void)
+{
+    struct cnode *cp;
+
+    if (coda_freelist) {
+	cp = coda_freelist;
+	coda_freelist = CNODE_NEXT(cp);
+	coda_reuse++;
+    }
+    else {
+	CODA_ALLOC(cp, struct cnode *, sizeof(struct cnode));
+	/* NetBSD vnodes don't have any Pager info in them ('cause there are
+	   no external pagers, duh!) */
+#define VNODE_VM_INFO_INIT(vp)         /* MT */
+	VNODE_VM_INFO_INIT(CTOV(cp));
+	coda_new++;
+    }
+    bzero(cp, sizeof (struct cnode));
+
+    return(cp);
+}
+
+/*
+ * Deallocate a cnode.
+ */
+void
+coda_free(cp)
+     register struct cnode *cp;
+{
+
+    CNODE_NEXT(cp) = coda_freelist;
+    coda_freelist = cp;
+}
+
+/*
+ * Put a cnode in the hash table
+ */
+void
+coda_save(cp)
+     struct cnode *cp;
+{
+	CNODE_NEXT(cp) = coda_cache[coda_hash(&cp->c_fid)];
+	coda_cache[coda_hash(&cp->c_fid)] = cp;
+}
+
+/*
+ * Remove a cnode from the hash table
+ */
+void
+coda_unsave(cp)
+     struct cnode *cp;
+{
+    struct cnode *ptr;
+    struct cnode *ptrprev = NULL;
+    
+    ptr = coda_cache[coda_hash(&cp->c_fid)]; 
+    while (ptr != NULL) { 
+	if (ptr == cp) { 
+	    if (ptrprev == NULL) {
+		coda_cache[coda_hash(&cp->c_fid)] 
+		    = CNODE_NEXT(ptr);
+	    } else {
+		CNODE_NEXT(ptrprev) = CNODE_NEXT(ptr);
+	    }
+	    CNODE_NEXT(cp) = (struct cnode *)NULL;
+	    
+	    return; 
+	}	
+	ptrprev = ptr;
+	ptr = CNODE_NEXT(ptr);
+    }	
+}
+
+/*
+ * Lookup a cnode by fid. If the cnode is dying, it is bogus so skip it.
+ * NOTE: this allows multiple cnodes with same fid -- dcs 1/25/95
+ */
+struct cnode *
+coda_find(fid) 
+     CodaFid *fid;
+{
+    struct cnode *cp;
+
+    cp = coda_cache[coda_hash(fid)];
+    while (cp) {
+	    if (coda_fid_eq(&(cp->c_fid), fid) &&
+	    (!IS_UNMOUNTING(cp)))
+	    {
+		coda_active++;
+		return(cp); 
+	    }		    
+	cp = CNODE_NEXT(cp);
+    }
+    return(NULL);
+}
+
+/*
+ * coda_kill is called as a side effect to vcopen. To prevent any
+ * cnodes left around from an earlier run of a venus or warden from
+ * causing problems with the new instance, mark any outstanding cnodes
+ * as dying. Future operations on these cnodes should fail (excepting
+ * coda_inactive of course!). Since multiple venii/wardens can be
+ * running, only kill the cnodes for a particular entry in the
+ * coda_mnttbl. -- DCS 12/1/94 */
+
+int
+coda_kill(whoIam, dcstat)
+	struct mount *whoIam;
+	enum dc_status dcstat;
+{
+	int hash, count = 0;
+	struct cnode *cp;
+	
+	/* 
+	 * Algorithm is as follows: 
+	 *     Second, flush whatever vnodes we can from the name cache.
+	 * 
+	 *     Finally, step through whatever is left and mark them dying.
+	 *        This prevents any operation at all.
+	 */
+	
+	/* This is slightly overkill, but should work. Eventually it'd be
+	 * nice to only flush those entries from the namecache that
+	 * reference a vnode in this vfs.  */
+	coda_nc_flush(dcstat);
+	
+	for (hash = 0; hash < CODA_CACHESIZE; hash++) {
+		for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) {
+			if (CTOV(cp)->v_mount == whoIam) {
+#ifdef	DEBUG
+				printf("coda_kill: vp %p, cp %p\n", CTOV(cp), cp);
+#endif
+				count++;
+				CODADEBUG(CODA_FLUSH, 
+					  myprintf(("Live cnode fid %s flags %d count %d\n",
+						    coda_f2s(&cp->c_fid),
+						    cp->c_flags,
+						    vrefcnt(CTOV(cp)))); );
+			}
+		}
+	}
+	return count;
+}
+
+/*
+ * There are two reasons why a cnode may be in use, it may be in the
+ * name cache or it may be executing.  
+ */
+void
+coda_flush(dcstat)
+	enum dc_status dcstat;
+{
+    int hash;
+    struct cnode *cp;
+    
+    coda_clstat.ncalls++;
+    coda_clstat.reqs[CODA_FLUSH]++;
+    
+    coda_nc_flush(dcstat);	    /* flush files from the name cache */
+
+    for (hash = 0; hash < CODA_CACHESIZE; hash++) {
+	for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) {  
+	    if (!IS_DIR(cp->c_fid)) /* only files can be executed */
+		coda_vmflush(cp);
+	}
+    }
+}
+
+/*
+ * As a debugging measure, print out any cnodes that lived through a
+ * name cache flush.  
+ */
+void
+coda_testflush(void)
+{
+    int hash;
+    struct cnode *cp;
+    
+    for (hash = 0; hash < CODA_CACHESIZE; hash++) {
+	for (cp = coda_cache[hash];
+	     cp != NULL;
+	     cp = CNODE_NEXT(cp)) {  
+	    myprintf(("Live cnode fid %s count %d\n",
+		      coda_f2s(&cp->c_fid), CTOV(cp)->v_usecount));
+	}
+    }
+}
+
+/*
+ *     First, step through all cnodes and mark them unmounting.
+ *         NetBSD kernels may try to fsync them now that venus
+ *         is dead, which would be a bad thing.
+ *
+ */
+void
+coda_unmounting(whoIam)
+	struct mount *whoIam;
+{	
+	int hash;
+	struct cnode *cp;
+
+	for (hash = 0; hash < CODA_CACHESIZE; hash++) {
+		for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) {
+			if (CTOV(cp)->v_mount == whoIam) {
+				if (cp->c_flags & (C_LOCKED|C_WANTED)) {
+					printf("coda_unmounting: Unlocking %p\n", cp);
+					cp->c_flags &= ~(C_LOCKED|C_WANTED);
+					wakeup((caddr_t) cp);
+				}
+				cp->c_flags |= C_UNMOUNTING;
+			}
+		}
+	}
+}
+
+#ifdef	DEBUG
+void
+coda_checkunmounting(mp)
+	struct mount *mp;
+{
+	struct vnode *vp, *nvp;
+	struct cnode *cp;
+	int count = 0, bad = 0;
+
+	MNT_ILOCK(mp);
+	MNT_VNODE_FOREACH(vp, mp, nvp) {
+		VI_LOCK(vp);
+		if (vp->v_iflag & VI_DOOMED) {
+			VI_UNLOCK(vp);
+			continue;
+		}
+		cp = VTOC(vp);
+		count++;
+		if (!(cp->c_flags & C_UNMOUNTING)) {
+			bad++;
+			printf("vp %p, cp %p missed\n", vp, cp);
+			cp->c_flags |= C_UNMOUNTING;
+		}
+		VI_UNLOCK(vp);
+	}
+	MNT_IUNLOCK(mp);
+}
+
+void
+coda_cacheprint(whoIam)
+	struct mount *whoIam;
+{	
+	int hash;
+	struct cnode *cp;
+	int count = 0;
+
+	printf("coda_cacheprint: coda_ctlvp %p, cp %p", coda_ctlvp, VTOC(coda_ctlvp));
+	coda_nc_name(VTOC(coda_ctlvp));
+	printf("\n");
+
+	for (hash = 0; hash < CODA_CACHESIZE; hash++) {
+		for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) {
+			if (CTOV(cp)->v_mount == whoIam) {
+				printf("coda_cacheprint: vp %p, cp %p", CTOV(cp), cp);
+				coda_nc_name(cp);
+				printf("\n");
+				count++;
+			}
+		}
+	}
+	printf("coda_cacheprint: count %d\n", count);
+}
+#endif
+
+/*
+ * There are 6 cases where invalidations occur. The semantics of each
+ * is listed here.
+ *
+ * CODA_FLUSH     -- flush all entries from the name cache and the cnode cache.
+ * CODA_PURGEUSER -- flush all entries from the name cache for a specific user
+ *                  This call is a result of token expiration.
+ *
+ * The next two are the result of callbacks on a file or directory.
+ * CODA_ZAPDIR    -- flush the attributes for the dir from its cnode.
+ *                  Zap all children of this directory from the namecache.
+ * CODA_ZAPFILE   -- flush the attributes for a file.
+ *
+ * The fifth is a result of Venus detecting an inconsistent file.
+ * CODA_PURGEFID  -- flush the attribute for the file
+ *                  If it is a dir (odd vnode), purge its 
+ *                  children from the namecache
+ *                  remove the file from the namecache.
+ *
+ * The sixth allows Venus to replace local fids with global ones
+ * during reintegration.
+ *
+ * CODA_REPLACE -- replace one CodaFid with another throughout the name cache 
+ */
+
+int handleDownCall(opcode, out)
+     int opcode; union outputArgs *out;
+{
+    int error;
+
+    /* Handle invalidate requests. */
+    switch (opcode) {
+      case CODA_FLUSH : {
+
+	  coda_flush(IS_DOWNCALL);
+	  
+	  CODADEBUG(CODA_FLUSH,coda_testflush();)    /* print remaining cnodes */
+	      return(0);
+      }
+	
+      case CODA_PURGEUSER : {
+	  coda_clstat.ncalls++;
+	  coda_clstat.reqs[CODA_PURGEUSER]++;
+	  
+	  /* XXX - need to prevent fsync's */
+#ifdef CODA_COMPAT_5
+	  coda_nc_purge_user(out->coda_purgeuser.cred.cr_uid, IS_DOWNCALL);
+#else
+	  coda_nc_purge_user(out->coda_purgeuser.uid, IS_DOWNCALL);
+#endif
+	  return(0);
+      }
+	
+      case CODA_ZAPFILE : {
+	  struct cnode *cp;
+
+	  error = 0;
+	  coda_clstat.ncalls++;
+	  coda_clstat.reqs[CODA_ZAPFILE]++;
+	  
+	  cp = coda_find(&out->coda_zapfile.Fid);
+	  if (cp != NULL) {
+	      vref(CTOV(cp));
+	      
+	      cp->c_flags &= ~C_VATTR;
+	      ASSERT_VOP_LOCKED(CTOV(cp), "coda HandleDownCall");
+	      if (CTOV(cp)->v_vflag & VV_TEXT)
+		  error = coda_vmflush(cp);
+	      CODADEBUG(CODA_ZAPFILE, 
+			myprintf(("zapfile: fid = %s, refcnt = %d, error = %d\n",
+				  coda_f2s(&cp->c_fid), CTOV(cp)->v_usecount - 1, error)););
+    if (vrefcnt(CTOV(cp)) == 1) {
+		  cp->c_flags |= C_PURGING;
+	      }
+	      vrele(CTOV(cp));
+	  }
+	  
+	  return(error);
+      }
+	
+      case CODA_ZAPDIR : {
+	  struct cnode *cp;
+
+	  coda_clstat.ncalls++;
+	  coda_clstat.reqs[CODA_ZAPDIR]++;
+	  
+	  cp = coda_find(&out->coda_zapdir.Fid);
+	  if (cp != NULL) {
+	      vref(CTOV(cp));
+	      
+	      cp->c_flags &= ~C_VATTR;
+	      coda_nc_zapParentfid(&out->coda_zapdir.Fid, IS_DOWNCALL);
+
+	      CODADEBUG(CODA_ZAPDIR, myprintf((
+		  "zapdir: fid = %s, refcnt = %d\n",
+		  coda_f2s(&cp->c_fid), CTOV(cp)->v_usecount - 1)););
+	      if (vrefcnt(CTOV(cp)) == 1) {
+		  cp->c_flags |= C_PURGING;
+	      }
+	      vrele(CTOV(cp));
+	  }
+	  
+	  return(0);
+      }
+	
+      case CODA_PURGEFID : {
+	  struct cnode *cp;
+
+	  error = 0;
+	  coda_clstat.ncalls++;
+	  coda_clstat.reqs[CODA_PURGEFID]++;
+
+	  cp = coda_find(&out->coda_purgefid.Fid);
+	  if (cp != NULL) {
+	      vref(CTOV(cp));
+	      if (IS_DIR(out->coda_purgefid.Fid)) { /* Vnode is a directory */
+		      coda_nc_zapParentfid(&out->coda_purgefid.Fid,IS_DOWNCALL);     
+	      }
+	      cp->c_flags &= ~C_VATTR;
+	      coda_nc_zapfid(&out->coda_purgefid.Fid, IS_DOWNCALL);
+	      ASSERT_VOP_LOCKED(CTOV(cp), "coda HandleDownCall");
+	      if (!(IS_DIR(out->coda_purgefid.Fid)) 
+		  && (CTOV(cp)->v_vflag & VV_TEXT)) {
+		  
+		  error = coda_vmflush(cp);
+	      }
+	      CODADEBUG(CODA_PURGEFID, myprintf((
+			 "purgefid: fid = %s, refcnt = %d, error = %d\n",
+			 coda_f2s(&cp->c_fid), CTOV(cp)->v_usecount - 1, error)););
+	      if (vrefcnt(CTOV(cp)) == 1) {
+		  cp->c_flags |= C_PURGING;
+	      }
+	      vrele(CTOV(cp));
+	  }
+	  return(error);
+      }
+
+      case CODA_REPLACE : {
+	  struct cnode *cp = NULL;
+
+	  coda_clstat.ncalls++;
+	  coda_clstat.reqs[CODA_REPLACE]++;
+	  
+	  cp = coda_find(&out->coda_replace.OldFid);
+	  if (cp != NULL) { 
+	      /* remove the cnode from the hash table, replace the fid, and reinsert */
+	      vref(CTOV(cp));
+	      coda_unsave(cp);
+	      cp->c_fid = out->coda_replace.NewFid;
+	      coda_save(cp);
+
+	      CODADEBUG(CODA_REPLACE, myprintf((
+			"replace: oldfid = %s, newfid = %s, cp = %p\n",
+			coda_f2s(&out->coda_replace.OldFid),
+			coda_f2s(&cp->c_fid), cp));)	      vrele(CTOV(cp));
+	  }
+	  return (0);
+      }
+      default:
+      	myprintf(("handleDownCall: unknown opcode %d\n", opcode));
+	return (EINVAL);
+    }
+}
+
+/* coda_grab_vnode: lives in either cfs_mach.c or cfs_nbsd.c */
+
+int
+coda_vmflush(cp)
+     struct cnode *cp;
+{
+    return 0;
+}
+
+
+/* 
+ * kernel-internal debugging switches
+ */
+void coda_debugon(void)
+{
+    codadebug = -1;
+    coda_nc_debug = -1;
+    coda_vnop_print_entry = 1;
+    coda_psdev_print_entry = 1;
+    coda_vfsop_print_entry = 1;
+}
+
+void coda_debugoff(void)
+{
+    codadebug = 0;
+    coda_nc_debug = 0;
+    coda_vnop_print_entry = 0;
+    coda_psdev_print_entry = 0;
+    coda_vfsop_print_entry = 0;
+}
+
+/*
+ * Utilities used by both client and server
+ * Standard levels:
+ * 0) no debugging
+ * 1) hard failures
+ * 2) soft failures
+ * 3) current test software
+ * 4) main procedure entry points
+ * 5) main procedure exit points
+ * 6) utility procedure entry points
+ * 7) utility procedure exit points
+ * 8) obscure procedure entry points
+ * 9) obscure procedure exit points
+ * 10) random stuff
+ * 11) all <= 1
+ * 12) all <= 2
+ * 13) all <= 3
+ * ...
+ */
--- /dev/null
+++ sys/fs/coda/coda_vnops.c
@@ -0,0 +1,1735 @@
+/*-
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ *  	@(#) src/sys/coda/coda_vnops.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ */
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda filesystem at Carnegie Mellon
+ * University.  Contributers include David Steere, James Kistler, and
+ * M. Satyanarayanan.  
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/fs/coda/coda_vnops.c,v 1.76.4.1 2008/01/23 12:09:43 rwatson Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/acct.h>
+#include <sys/errno.h>
+#include <sys/fcntl.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/file.h>		/* Must come after sys/malloc.h */
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/unistd.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+
+#include <fs/coda/coda.h>
+#include <fs/coda/cnode.h>
+#include <fs/coda/coda_vnops.h>
+#include <fs/coda/coda_venus.h>
+#include <fs/coda/coda_opstats.h>
+#include <fs/coda/coda_subr.h>
+#include <fs/coda/coda_namecache.h>
+#include <fs/coda/coda_pioctl.h>
+
+/* 
+ * These flags select various performance enhancements.
+ */
+int coda_attr_cache  = 1;       /* Set to cache attributes in the kernel */
+int coda_symlink_cache = 1;     /* Set to cache symbolic link information */
+int coda_access_cache = 1;      /* Set to handle some access checks directly */
+
+/* structure to keep track of vfs calls */
+
+struct coda_op_stats coda_vnodeopstats[CODA_VNODEOPS_SIZE];
+
+#define MARK_ENTRY(op) (coda_vnodeopstats[op].entries++)
+#define MARK_INT_SAT(op) (coda_vnodeopstats[op].sat_intrn++)
+#define MARK_INT_FAIL(op) (coda_vnodeopstats[op].unsat_intrn++)
+#define MARK_INT_GEN(op) (coda_vnodeopstats[op].gen_intrn++)
+
+/* What we are delaying for in printf */
+int coda_printf_delay = 0;  /* in microseconds */
+int coda_vnop_print_entry = 0;
+static int coda_lockdebug = 0;
+
+/*
+ * Some NetBSD details:
+ * 
+ *   coda_start is called at the end of the mount syscall.
+ *   coda_init is called at boot time.
+ */
+
+#define ENTRY  if(coda_vnop_print_entry) myprintf(("Entered %s\n",__func__))
+
+/* Definition of the vnode operation vector */
+
+struct vop_vector coda_vnodeops = {
+    .vop_default = &default_vnodeops,
+    .vop_lookup = coda_lookup,		/* lookup */
+    .vop_create = coda_create,		/* create */
+    .vop_open = coda_open,		/* open */
+    .vop_close = coda_close,		/* close */
+    .vop_access = coda_access,		/* access */
+    .vop_getattr = coda_getattr,	/* getattr */
+    .vop_setattr = coda_setattr,	/* setattr */
+    .vop_read = coda_read,		/* read */
+    .vop_write = coda_write,		/* write */
+    .vop_ioctl = coda_ioctl,		/* ioctl */
+    .vop_fsync = coda_fsync,		/* fsync */
+    .vop_remove = coda_remove,		/* remove */
+    .vop_link = coda_link,		/* link */
+    .vop_rename = coda_rename,		/* rename */
+    .vop_mkdir = coda_mkdir,		/* mkdir */
+    .vop_rmdir = coda_rmdir,		/* rmdir */
+    .vop_symlink = coda_symlink,	/* symlink */
+    .vop_readdir = coda_readdir,	/* readdir */
+    .vop_readlink = coda_readlink,	/* readlink */
+    .vop_inactive = coda_inactive,	/* inactive */
+    .vop_reclaim = coda_reclaim,	/* reclaim */
+    .vop_lock1 = coda_lock,		/* lock */
+    .vop_unlock = coda_unlock,		/* unlock */
+    .vop_bmap = coda_bmap,		/* bmap */
+    .vop_print = VOP_NULL,		/* print */
+    .vop_islocked = coda_islocked,	/* islocked */
+    .vop_pathconf = coda_pathconf,	/* pathconf */
+    .vop_poll = vop_stdpoll,
+    .vop_getpages = vop_stdgetpages,	/* pager intf.*/
+    .vop_putpages = vop_stdputpages,	/* pager intf.*/
+    .vop_getwritemount =	vop_stdgetwritemount,
+
+#if 0
+    missing
+    .vop_cachedlookup =	ufs_lookup,
+    .vop_whiteout =	ufs_whiteout,
+#endif
+
+};
+
+/* A generic do-nothing.  For lease_check, advlock */
+int
+coda_vop_nop(void *anon) {
+    struct vnodeop_desc **desc = (struct vnodeop_desc **)anon;
+
+    if (codadebug) {
+	myprintf(("Vnode operation %s called, but unsupported\n",
+		  (*desc)->vdesc_name));
+    } 
+   return (0);
+}
+
+int
+coda_vnodeopstats_init(void)
+{
+	register int i;
+	
+	for(i=0;i<CODA_VNODEOPS_SIZE;i++) {
+		coda_vnodeopstats[i].opcode = i;
+		coda_vnodeopstats[i].entries = 0;
+		coda_vnodeopstats[i].sat_intrn = 0;
+		coda_vnodeopstats[i].unsat_intrn = 0;
+		coda_vnodeopstats[i].gen_intrn = 0;
+	}
+	return 0;
+}
+		
+/* 
+ * coda_open calls Venus which returns an open file descriptor the cache
+ * file holding the data. We get the vnode while we are still in the
+ * context of the venus process in coda_psdev.c. This vnode is then
+ * passed back to the caller and opened.
+ */
+int
+coda_open(struct vop_open_args *ap)
+{
+    /* 
+     * NetBSD can pass the O_EXCL flag in mode, even though the check
+     * has already happened.  Venus defensively assumes that if open
+     * is passed the EXCL, it must be a bug.  We strip the flag here.
+     */
+/* true args */
+    register struct vnode **vpp = &(ap->a_vp);
+    struct cnode *cp = VTOC(*vpp);
+    int flag = ap->a_mode & (~O_EXCL);
+    struct ucred *cred = ap->a_cred;
+    struct thread *td = ap->a_td;
+/* locals */
+    int error;
+    struct vnode *vp;
+
+    MARK_ENTRY(CODA_OPEN_STATS);
+
+    /* Check for open of control file. */
+    if (IS_CTL_VP(*vpp)) {
+	/* XXX */
+	/* if (WRITEABLE(flag)) */ 
+	if (flag & (FWRITE | O_TRUNC | O_CREAT | O_EXCL)) {
+	    MARK_INT_FAIL(CODA_OPEN_STATS);
+	    return(EACCES);
+	}
+	MARK_INT_SAT(CODA_OPEN_STATS);
+	return(0);
+    }
+
+    error = venus_open(vtomi((*vpp)), &cp->c_fid, flag, cred, td->td_proc, &vp);
+    if (error)
+	return (error);
+
+    CODADEBUG( CODA_OPEN,myprintf(("open: vp %p result %d\n", vp, error));)
+
+    /* Save the vnode pointer for the cache file. */
+    if (cp->c_ovp == NULL) {
+	cp->c_ovp = vp;
+    } else {
+	if (cp->c_ovp != vp)
+	    panic("coda_open:  cp->c_ovp != ITOV(ip)");
+    }
+    cp->c_ocount++;
+
+    /* Flush the attribute cached if writing the file. */
+    if (flag & FWRITE) {
+	cp->c_owrite++;
+	cp->c_flags &= ~C_VATTR;
+    }
+
+    /* Open the cache file. */
+    error = VOP_OPEN(vp, flag, cred, td, NULL); 
+    if (error) {
+    	printf("coda_open: VOP_OPEN on container failed %d\n", error);
+	return (error);
+    } else {
+	(*vpp)->v_object = vp->v_object;
+    }
+/* grab (above) does this when it calls newvnode unless it's in the cache*/
+
+    return(error);
+}
+
+/*
+ * Close the cache file used for I/O and notify Venus.
+ */
+int
+coda_close(struct vop_close_args *ap)
+{
+/* true args */
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    int flag = ap->a_fflag;
+    struct ucred *cred = ap->a_cred;
+    struct thread *td = ap->a_td;
+/* locals */
+    int error;
+
+    MARK_ENTRY(CODA_CLOSE_STATS);
+
+    /* Check for close of control file. */
+    if (IS_CTL_VP(vp)) {
+	MARK_INT_SAT(CODA_CLOSE_STATS);
+	return(0);
+    }
+
+    if (cp->c_ovp) {
+	VOP_CLOSE(cp->c_ovp, flag, cred, td); /* Do errors matter here? */
+	vrele(cp->c_ovp);
+    }
+#ifdef CODA_VERBOSE
+    else printf("coda_close: NO container vp %p/cp %p\n", vp, cp);
+#endif
+
+    if (--cp->c_ocount == 0)
+	cp->c_ovp = NULL;
+
+    if (flag & FWRITE)                    /* file was opened for write */
+	--cp->c_owrite;
+
+    if (!IS_UNMOUNTING(cp))
+         error = venus_close(vtomi(vp), &cp->c_fid, flag, cred, td->td_proc);
+    else error = ENODEV;
+
+    CODADEBUG(CODA_CLOSE, myprintf(("close: result %d\n",error)); )
+    return(error);
+}
+
+int
+coda_read(struct vop_read_args *ap)
+{
+
+    ENTRY;
+    return(coda_rdwr(ap->a_vp, ap->a_uio, UIO_READ,
+		    ap->a_ioflag, ap->a_cred, ap->a_uio->uio_td));
+}
+
+int
+coda_write(struct vop_write_args *ap)
+{
+
+    ENTRY;
+    return(coda_rdwr(ap->a_vp, ap->a_uio, UIO_WRITE,
+		    ap->a_ioflag, ap->a_cred, ap->a_uio->uio_td));
+}
+
+int
+coda_rdwr(struct vnode *vp, struct uio *uiop, enum uio_rw rw, int ioflag,
+    struct ucred *cred, struct thread *td)
+{ 
+/* upcall decl */
+  /* NOTE: container file operation!!! */
+/* locals */
+    struct cnode *cp = VTOC(vp);
+    struct vnode *cfvp = cp->c_ovp;
+    int opened_internally = 0;
+    int error = 0;
+
+    MARK_ENTRY(CODA_RDWR_STATS);
+
+    CODADEBUG(CODA_RDWR, myprintf(("coda_rdwr(%d, %p, %d, %lld, %d)\n", rw, 
+			      (void *)uiop->uio_iov->iov_base, uiop->uio_resid, 
+			      (long long)uiop->uio_offset, uiop->uio_segflg)); )
+	
+    /* Check for rdwr of control object. */
+    if (IS_CTL_VP(vp)) {
+	MARK_INT_FAIL(CODA_RDWR_STATS);
+	return(EINVAL);
+    }
+
+    /* 
+     * If file is not already open this must be a page {read,write} request
+     * and we should open it internally.
+     */
+    if (cfvp == NULL) {
+	opened_internally = 1;
+	MARK_INT_GEN(CODA_OPEN_STATS);
+	error = VOP_OPEN(vp, (rw == UIO_READ ? FREAD : FWRITE), cred, td, NULL);
+#ifdef CODA_VERBOSE
+	printf("coda_rdwr: Internally Opening %p\n", vp);
+#endif
+	if (error) {
+		printf("coda_rdwr: VOP_OPEN on container failed %d\n", error);
+		return (error);
+	}
+	cfvp = cp->c_ovp;
+    }
+
+    /* Have UFS handle the call. */
+    CODADEBUG(CODA_RDWR, myprintf(("indirect rdwr: fid = %s, refcnt = %d\n",
+			     coda_f2s(&cp->c_fid), CTOV(cp)->v_usecount)); )
+    if (rw == UIO_READ) {
+	error = VOP_READ(cfvp, uiop, ioflag, cred);
+    } else {
+	error = VOP_WRITE(cfvp, uiop, ioflag, cred);
+	/* ufs_write updates the vnode_pager_setsize for the vnode/object */
+
+	{   struct vattr attr;
+
+	    if (VOP_GETATTR(cfvp, &attr, cred, td) == 0) {
+		vnode_pager_setsize(vp, attr.va_size);
+	    }
+	}
+    }
+
+    if (error)
+	MARK_INT_FAIL(CODA_RDWR_STATS);
+    else
+	MARK_INT_SAT(CODA_RDWR_STATS);
+
+    /* Do an internal close if necessary. */
+    if (opened_internally) {
+	MARK_INT_GEN(CODA_CLOSE_STATS);
+	(void)VOP_CLOSE(vp, (rw == UIO_READ ? FREAD : FWRITE), cred, td);
+    }
+
+    /* Invalidate cached attributes if writing. */
+    if (rw == UIO_WRITE)
+	cp->c_flags &= ~C_VATTR;
+    return(error);
+}
+
+
+
+int
+coda_ioctl(struct vop_ioctl_args *ap)
+{
+/* true args */
+    struct vnode *vp = ap->a_vp;
+    int com = ap->a_command;
+    caddr_t data = ap->a_data;
+    int flag = ap->a_fflag;
+    struct ucred *cred = ap->a_cred;
+    struct thread *td = ap->a_td;
+/* locals */
+    int error;
+    struct vnode *tvp;
+    struct nameidata ndp;
+    struct PioctlData *iap = (struct PioctlData *)data;
+
+    MARK_ENTRY(CODA_IOCTL_STATS);
+
+    CODADEBUG(CODA_IOCTL, myprintf(("in coda_ioctl on %s\n", iap->path));)
+	
+    /* Don't check for operation on a dying object, for ctlvp it
+       shouldn't matter */
+	
+    /* Must be control object to succeed. */
+    if (!IS_CTL_VP(vp)) {
+	MARK_INT_FAIL(CODA_IOCTL_STATS);
+	CODADEBUG(CODA_IOCTL, myprintf(("coda_ioctl error: vp != ctlvp"));)
+	    return (EOPNOTSUPP);
+    }
+    /* Look up the pathname. */
+
+    /* Should we use the name cache here? It would get it from
+       lookupname sooner or later anyway, right? */
+
+    NDINIT(&ndp, LOOKUP, (iap->follow ? FOLLOW : NOFOLLOW), UIO_USERSPACE, iap->path, td);
+    error = namei(&ndp);
+    tvp = ndp.ni_vp;
+
+    if (error) {
+	MARK_INT_FAIL(CODA_IOCTL_STATS);
+	CODADEBUG(CODA_IOCTL, myprintf(("coda_ioctl error: lookup returns %d\n",
+				   error));)
+	return(error);
+    }
+
+    /* 
+     * Make sure this is a coda style cnode, but it may be a
+     * different vfsp 
+     */
+    if (tvp->v_op != &coda_vnodeops) {
+	vrele(tvp);
+	NDFREE(&ndp, NDF_ONLY_PNBUF);
+	MARK_INT_FAIL(CODA_IOCTL_STATS);
+	CODADEBUG(CODA_IOCTL, 
+		 myprintf(("coda_ioctl error: %s not a coda object\n", 
+			iap->path));)
+	return(EINVAL);
+    }
+
+    if (iap->vi.in_size > VC_MAXDATASIZE) {
+	NDFREE(&ndp, 0);
+	return(EINVAL);
+    }
+    error = venus_ioctl(vtomi(tvp), &((VTOC(tvp))->c_fid), com, flag, data, cred, td->td_proc);
+
+    if (error)
+	MARK_INT_FAIL(CODA_IOCTL_STATS);
+    else
+	CODADEBUG(CODA_IOCTL, myprintf(("Ioctl returns %d \n", error)); )
+
+    vrele(tvp);
+    NDFREE(&ndp, NDF_ONLY_PNBUF);
+    return(error);
+}
+
+/*
+ * To reduce the cost of a user-level venus;we cache attributes in
+ * the kernel.  Each cnode has storage allocated for an attribute. If
+ * c_vattr is valid, return a reference to it. Otherwise, get the
+ * attributes from venus and store them in the cnode.  There is some
+ * question if this method is a security leak. But I think that in
+ * order to make this call, the user must have done a lookup and
+ * opened the file, and therefore should already have access.  
+ */
+int
+coda_getattr(struct vop_getattr_args *ap)
+{
+/* true args */
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    struct vattr *vap = ap->a_vap;
+    struct ucred *cred = ap->a_cred;
+    struct thread *td = ap->a_td;
+/* locals */
+    int error;
+
+    MARK_ENTRY(CODA_GETATTR_STATS);
+
+    if (IS_UNMOUNTING(cp))
+	return ENODEV;
+
+    /* Check for getattr of control object. */
+    if (IS_CTL_VP(vp)) {
+	MARK_INT_FAIL(CODA_GETATTR_STATS);
+	return(ENOENT);
+    }
+
+    /* Check to see if the attributes have already been cached */
+    if (VALID_VATTR(cp)) { 
+	CODADEBUG(CODA_GETATTR, { myprintf(("attr cache hit: %s\n",
+					coda_f2s(&cp->c_fid)));});
+	CODADEBUG(CODA_GETATTR, if (!(codadebug & ~CODA_GETATTR))
+		 print_vattr(&cp->c_vattr); );
+	
+	*vap = cp->c_vattr;
+	MARK_INT_SAT(CODA_GETATTR_STATS);
+	return(0);
+    }
+
+    error = venus_getattr(vtomi(vp), &cp->c_fid, cred, td->td_proc, vap);
+
+    if (!error) {
+	CODADEBUG(CODA_GETATTR, myprintf(("getattr miss %s: result %d\n",
+				     coda_f2s(&cp->c_fid), error)); )	       
+	    
+	CODADEBUG(CODA_GETATTR, if (!(codadebug & ~CODA_GETATTR))
+		 print_vattr(vap);	);
+	
+    {	int size = vap->va_size;
+    	struct vnode *convp = cp->c_ovp;
+	if (convp != (struct vnode *)0) {
+	    vnode_pager_setsize(convp, size);
+	}
+    }
+	/* If not open for write, store attributes in cnode */   
+	if ((cp->c_owrite == 0) && (coda_attr_cache)) {  
+	    cp->c_vattr = *vap;
+	    cp->c_flags |= C_VATTR; 
+	}
+	
+    }
+    return(error);
+}
+
+int
+coda_setattr(struct vop_setattr_args *ap)
+{
+/* true args */
+    register struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    register struct vattr *vap = ap->a_vap;
+    struct ucred *cred = ap->a_cred;
+    struct thread *td = ap->a_td;
+/* locals */
+    int error;
+
+    MARK_ENTRY(CODA_SETATTR_STATS);
+
+    /* Check for setattr of control object. */
+    if (IS_CTL_VP(vp)) {
+	MARK_INT_FAIL(CODA_SETATTR_STATS);
+	return(ENOENT);
+    }
+
+    if (codadebug & CODADBGMSK(CODA_SETATTR)) {
+	print_vattr(vap);
+    }
+    error = venus_setattr(vtomi(vp), &cp->c_fid, vap, cred, td->td_proc);
+
+    if (!error)
+	cp->c_flags &= ~C_VATTR;
+
+    {	int size = vap->va_size;
+    	struct vnode *convp = cp->c_ovp;
+	if (size != VNOVAL && convp != (struct vnode *)0) {
+	    vnode_pager_setsize(convp, size);
+	}
+    }
+    CODADEBUG(CODA_SETATTR,	myprintf(("setattr %d\n", error)); )
+    return(error);
+}
+
+int
+coda_access(struct vop_access_args *ap)
+{
+/* true args */
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    int mode = ap->a_mode;
+    struct ucred *cred = ap->a_cred;
+    struct thread *td = ap->a_td;
+/* locals */
+    int error;
+
+    MARK_ENTRY(CODA_ACCESS_STATS);
+
+    /* Check for access of control object.  Only read access is
+       allowed on it. */
+    if (IS_CTL_VP(vp)) {
+	/* bogus hack - all will be marked as successes */
+	MARK_INT_SAT(CODA_ACCESS_STATS);
+	return(((mode & VREAD) && !(mode & (VWRITE | VEXEC))) 
+	       ? 0 : EACCES);
+    }
+
+    /*
+     * if the file is a directory, and we are checking exec (eg lookup) 
+     * access, and the file is in the namecache, then the user must have 
+     * lookup access to it.
+     */
+    if (coda_access_cache) {
+	if ((vp->v_type == VDIR) && (mode & VEXEC)) {
+	    if (coda_nc_lookup(cp, ".", 1, cred)) {
+		MARK_INT_SAT(CODA_ACCESS_STATS);
+		return(0);                     /* it was in the cache */
+	    }
+	}
+    }
+
+    error = venus_access(vtomi(vp), &cp->c_fid, mode, cred, td->td_proc);
+
+    return(error);
+}
+
+int
+coda_readlink(struct vop_readlink_args *ap)
+{
+/* true args */
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    struct uio *uiop = ap->a_uio;
+    struct ucred *cred = ap->a_cred;
+    struct thread *td = ap->a_uio->uio_td;
+/* locals */
+    int error;
+    char *str;
+    int len;
+
+    MARK_ENTRY(CODA_READLINK_STATS);
+
+    /* Check for readlink of control object. */
+    if (IS_CTL_VP(vp)) {
+	MARK_INT_FAIL(CODA_READLINK_STATS);
+	return(ENOENT);
+    }
+
+    if ((coda_symlink_cache) && (VALID_SYMLINK(cp))) { /* symlink was cached */
+	uiop->uio_rw = UIO_READ;
+	error = uiomove(cp->c_symlink, (int)cp->c_symlen, uiop);
+	if (error)
+	    MARK_INT_FAIL(CODA_READLINK_STATS);
+	else
+	    MARK_INT_SAT(CODA_READLINK_STATS);
+	return(error);
+    }
+
+    error = venus_readlink(vtomi(vp), &cp->c_fid, cred,
+        td != NULL ? td->td_proc : NULL, &str, &len);
+
+    if (!error) {
+	uiop->uio_rw = UIO_READ;
+	error = uiomove(str, len, uiop);
+
+	if (coda_symlink_cache) {
+	    cp->c_symlink = str;
+	    cp->c_symlen = len;
+	    cp->c_flags |= C_SYMLINK;
+	} else
+	    CODA_FREE(str, len);
+    }
+
+    CODADEBUG(CODA_READLINK, myprintf(("in readlink result %d\n",error));)
+    return(error);
+}
+
+int
+coda_fsync(struct vop_fsync_args *ap)
+{
+/* true args */
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    struct thread *td = ap->a_td;
+/* locals */
+    struct vnode *convp = cp->c_ovp;
+    int error;
+   
+    MARK_ENTRY(CODA_FSYNC_STATS);
+
+    /* Check for fsync on an unmounting object */
+    /* The NetBSD kernel, in it's infinite wisdom, can try to fsync
+     * after an unmount has been initiated.  This is a Bad Thing,
+     * which we have to avoid.  Not a legitimate failure for stats.
+     */
+    if (IS_UNMOUNTING(cp)) {
+	return(ENODEV);
+    }
+
+    /* Check for fsync of control object. */
+    if (IS_CTL_VP(vp)) {
+	MARK_INT_SAT(CODA_FSYNC_STATS);
+	return(0);
+    }
+
+    if (convp)
+    	VOP_FSYNC(convp, MNT_WAIT, td);
+
+    /*
+     * We see fsyncs with usecount == 1 then usecount == 0.
+     * For now we ignore them.
+     */
+    /*
+    VI_LOCK(vp);
+    if (!vp->v_usecount) {
+    	printf("coda_fsync on vnode %p with %d usecount.  c_flags = %x (%x)\n",
+		vp, vp->v_usecount, cp->c_flags, cp->c_flags&C_PURGING);
+    }
+    VI_UNLOCK(vp);
+    */
+
+    /*
+     * We can expect fsync on any vnode at all if venus is pruging it.
+     * Venus can't very well answer the fsync request, now can it?
+     * Hopefully, it won't have to, because hopefully, venus preserves
+     * the (possibly untrue) invariant that it never purges an open
+     * vnode.  Hopefully.
+     */
+    if (cp->c_flags & C_PURGING) {
+	return(0);
+    }
+
+    /* needs research */
+    return 0;
+    error = venus_fsync(vtomi(vp), &cp->c_fid, td->td_proc);
+
+    CODADEBUG(CODA_FSYNC, myprintf(("in fsync result %d\n",error)); );
+    return(error);
+}
+
+int
+coda_inactive(struct vop_inactive_args *ap)
+{
+    /* XXX - at the moment, inactive doesn't look at cred, and doesn't
+       have a proc pointer.  Oops. */
+/* true args */
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    struct ucred *cred __attribute__((unused)) = NULL;
+    struct thread *td __attribute__((unused)) = curthread;
+/* upcall decl */
+/* locals */
+
+    /* We don't need to send inactive to venus - DCS */
+    MARK_ENTRY(CODA_INACTIVE_STATS);
+
+    CODADEBUG(CODA_INACTIVE, myprintf(("in inactive, %s, vfsp %p\n",
+				  coda_f2s(&cp->c_fid), vp->v_mount));)
+
+    vp->v_object = NULL;
+ 
+    /* If an array has been allocated to hold the symlink, deallocate it */
+    if ((coda_symlink_cache) && (VALID_SYMLINK(cp))) {
+	if (cp->c_symlink == NULL)
+	    panic("coda_inactive: null symlink pointer in cnode");
+	
+	CODA_FREE(cp->c_symlink, cp->c_symlen);
+	cp->c_flags &= ~C_SYMLINK;
+	cp->c_symlen = 0;
+    }
+
+    /* Remove it from the table so it can't be found. */
+    coda_unsave(cp);
+    if ((struct coda_mntinfo *)(vp->v_mount->mnt_data) == NULL) {
+	myprintf(("Help! vfsp->vfs_data was NULL, but vnode %p wasn't dying\n", vp));
+	panic("badness in coda_inactive\n");
+    }
+
+    if (IS_UNMOUNTING(cp)) {
+#ifdef	DEBUG
+	printf("coda_inactive: IS_UNMOUNTING use %d: vp %p, cp %p\n", vrefcnt(vp), vp, cp);
+	if (cp->c_ovp != NULL)
+	    printf("coda_inactive: cp->ovp != NULL use %d: vp %p, cp %p\n",
+	    	   vrefcnt(vp), vp, cp);
+#endif
+    } else {
+#ifdef OLD_DIAGNOSTIC
+	if (vrefcnt(CTOV(cp))) {
+	    panic("coda_inactive: nonzero reference count");
+	}
+	if (cp->c_ovp != NULL) {
+	    panic("coda_inactive:  cp->ovp != NULL");
+	}
+#endif
+	vgone(vp);
+    }
+
+    MARK_INT_SAT(CODA_INACTIVE_STATS);
+    return(0);
+}
+
+/*
+ * Remote filesystem operations having to do with directory manipulation.
+ */
+
+/* 
+ * It appears that in NetBSD, lookup is supposed to return the vnode locked
+ */
+int
+coda_lookup(struct vop_lookup_args *ap)
+{
+/* true args */
+    struct vnode *dvp = ap->a_dvp;
+    struct cnode *dcp = VTOC(dvp);
+    struct vnode **vpp = ap->a_vpp;
+    /* 
+     * It looks as though ap->a_cnp->ni_cnd->cn_nameptr holds the rest
+     * of the string to xlate, and that we must try to get at least
+     * ap->a_cnp->ni_cnd->cn_namelen of those characters to macth.  I
+     * could be wrong. 
+     */
+    struct componentname  *cnp = ap->a_cnp;
+    struct ucred *cred = cnp->cn_cred;
+    struct thread *td = cnp->cn_thread;
+/* locals */
+    struct cnode *cp;
+    const char *nm = cnp->cn_nameptr;
+    int len = cnp->cn_namelen;
+    CodaFid VFid;
+    int	vtype;
+    int error = 0;
+
+    MARK_ENTRY(CODA_LOOKUP_STATS);
+
+    CODADEBUG(CODA_LOOKUP, myprintf(("lookup: %s in %s\n",
+				   nm, coda_f2s(&dcp->c_fid))););
+
+    /* Check for lookup of control object. */
+    if (IS_CTL_NAME(dvp, nm, len)) {
+	*vpp = coda_ctlvp;
+	vref(*vpp);
+	MARK_INT_SAT(CODA_LOOKUP_STATS);
+	goto exit;
+    }
+
+    if (len+1 > CODA_MAXNAMLEN) {
+	MARK_INT_FAIL(CODA_LOOKUP_STATS);
+
+	CODADEBUG(CODA_LOOKUP, myprintf(("name too long: lookup, %s (%s)\n",
+					 coda_f2s(&dcp->c_fid), nm)););
+	*vpp = (struct vnode *)0;
+	error = EINVAL;
+	goto exit;
+    }
+    /* First try to look the file up in the cfs name cache */
+    /* lock the parent vnode? */
+    cp = coda_nc_lookup(dcp, nm, len, cred);
+    if (cp) {
+	*vpp = CTOV(cp);
+	vref(*vpp);
+	CODADEBUG(CODA_LOOKUP, 
+		 myprintf(("lookup result %d vpp %p\n",error,*vpp));)
+    } else {
+	
+	/* The name wasn't cached, so we need to contact Venus */
+	error = venus_lookup(vtomi(dvp), &dcp->c_fid, nm, len, cred, td->td_proc, &VFid, &vtype);
+	
+	if (error) {
+	    MARK_INT_FAIL(CODA_LOOKUP_STATS);
+
+	    CODADEBUG(CODA_LOOKUP, myprintf(("lookup error on %s (%s)%d\n",
+					     coda_f2s(&dcp->c_fid), nm, error));)
+	    *vpp = (struct vnode *)0;
+	} else {
+	    MARK_INT_SAT(CODA_LOOKUP_STATS);
+	    CODADEBUG(CODA_LOOKUP, 
+		     myprintf(("lookup: %s type %o result %d\n",
+			       coda_f2s(&VFid), vtype, error)); )
+	    cp = make_coda_node(&VFid, dvp->v_mount, vtype);
+	    *vpp = CTOV(cp);
+	    
+	    /* enter the new vnode in the Name Cache only if the top bit isn't set */
+	    /* And don't enter a new vnode for an invalid one! */
+	    if (!(vtype & CODA_NOCACHE))
+		coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
+	}
+    }
+
+ exit:
+    /* 
+     * If we are creating, and this was the last name to be looked up,
+     * and the error was ENOENT, then there really shouldn't be an
+     * error and we can make the leaf NULL and return success.  Since
+     * this is supposed to work under Mach as well as NetBSD, we're
+     * leaving this fn wrapped.  We also must tell lookup/namei that
+     * we need to save the last component of the name.  (Create will
+     * have to free the name buffer later...lucky us...)
+     */
+    if (((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME))
+	&& (cnp->cn_flags & ISLASTCN)
+	&& (error == ENOENT))
+    {
+	error = EJUSTRETURN;
+	cnp->cn_flags |= SAVENAME;
+	*ap->a_vpp = NULL;
+    }
+
+    /* 
+     * If we are removing, and we are at the last element, and we
+     * found it, then we need to keep the name around so that the
+     * removal will go ahead as planned.  Unfortunately, this will
+     * probably also lock the to-be-removed vnode, which may or may
+     * not be a good idea.  I'll have to look at the bits of
+     * coda_remove to make sure.  We'll only save the name if we did in
+     * fact find the name, otherwise coda_remove won't have a chance
+     * to free the pathname.  
+     */
+    if ((cnp->cn_nameiop == DELETE)
+	&& (cnp->cn_flags & ISLASTCN)
+	&& !error)
+    {
+	cnp->cn_flags |= SAVENAME;
+    }
+
+    /* 
+     * If the lookup went well, we need to (potentially?) unlock the
+     * parent, and lock the child.  We are only responsible for
+     * checking to see if the parent is supposed to be unlocked before
+     * we return.  We must always lock the child (provided there is
+     * one, and (the parent isn't locked or it isn't the same as the
+     * parent.)  Simple, huh?  We can never leave the parent locked unless
+     * we are ISLASTCN
+     */
+    if (!error || (error == EJUSTRETURN)) {
+	if (cnp->cn_flags & ISDOTDOT) {
+	    if ((error = VOP_UNLOCK(dvp, 0, td))) {
+		return error; 
+	    }	    
+	    /* 
+	     * The parent is unlocked.  As long as there is a child,
+	     * lock it without bothering to check anything else. 
+	     */
+	    if (*ap->a_vpp) {
+		vn_lock(*ap->a_vpp, LK_EXCLUSIVE | LK_RETRY, td);
+	    }
+	    vn_lock(dvp, LK_RETRY|LK_EXCLUSIVE, td);
+	} else {
+	    /* The parent is locked, and may be the same as the child */
+	    if (*ap->a_vpp && (*ap->a_vpp != dvp)) {
+		/* Different, go ahead and lock it. */
+		vn_lock(*ap->a_vpp, LK_EXCLUSIVE | LK_RETRY, td);
+	    }
+	}
+    } else {
+	/* If the lookup failed, we need to ensure that the leaf is NULL */
+	/* Don't change any locking? */
+	*ap->a_vpp = NULL;
+    }
+    return(error);
+}
+
+/*ARGSUSED*/
+int
+coda_create(struct vop_create_args *ap)
+{
+/* true args */
+    struct vnode *dvp = ap->a_dvp;
+    struct cnode *dcp = VTOC(dvp);
+    struct vattr *va = ap->a_vap;
+    int exclusive = 1;
+    int mode = ap->a_vap->va_mode;
+    struct vnode **vpp = ap->a_vpp;
+    struct componentname  *cnp = ap->a_cnp;
+    struct ucred *cred = cnp->cn_cred;
+    struct thread *td = cnp->cn_thread;
+/* locals */
+    int error;
+    struct cnode *cp;
+    const char *nm = cnp->cn_nameptr;
+    int len = cnp->cn_namelen;
+    CodaFid VFid;
+    struct vattr attr;
+
+    MARK_ENTRY(CODA_CREATE_STATS);
+
+    /* All creates are exclusive XXX */
+    /* I'm assuming the 'mode' argument is the file mode bits XXX */
+
+    /* Check for create of control object. */
+    if (IS_CTL_NAME(dvp, nm, len)) {
+	*vpp = (struct vnode *)0;
+	MARK_INT_FAIL(CODA_CREATE_STATS);
+	return(EACCES);
+    }
+
+    error = venus_create(vtomi(dvp), &dcp->c_fid, nm, len, exclusive, mode, va, cred, td->td_proc, &VFid, &attr);
+
+    if (!error) {
+	
+	/* If this is an exclusive create, panic if the file already exists. */
+	/* Venus should have detected the file and reported EEXIST. */
+
+	if ((exclusive == 1) &&
+	    (coda_find(&VFid) != NULL))
+	    panic("cnode existed for newly created file!");
+	
+	cp = make_coda_node(&VFid, dvp->v_mount, attr.va_type);
+	*vpp = CTOV(cp);
+	
+	/* Update va to reflect the new attributes. */
+	(*va) = attr;
+	
+	/* Update the attribute cache and mark it as valid */
+	if (coda_attr_cache) {
+	    VTOC(*vpp)->c_vattr = attr;
+	    VTOC(*vpp)->c_flags |= C_VATTR;       
+	}
+
+	/* Invalidate the parent's attr cache, the modification time has changed */
+	VTOC(dvp)->c_flags &= ~C_VATTR;
+	
+	/* enter the new vnode in the Name Cache */
+	coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
+	
+	CODADEBUG(CODA_CREATE, 
+		  myprintf(("create: %s, result %d\n",
+			   coda_f2s(&VFid), error)); )
+    } else {
+	*vpp = (struct vnode *)0;
+	CODADEBUG(CODA_CREATE, myprintf(("create error %d\n", error));)
+    }
+
+    if (!error) {
+	if (cnp->cn_flags & LOCKLEAF) {
+	    vn_lock(*ap->a_vpp, LK_EXCLUSIVE | LK_RETRY, td);
+	}
+#ifdef OLD_DIAGNOSTIC
+	else {
+	    printf("coda_create: LOCKLEAF not set!\n");
+	}
+#endif
+    }
+    return(error);
+}
+
+int
+coda_remove(struct vop_remove_args *ap)
+{
+/* true args */
+    struct vnode *dvp = ap->a_dvp;
+    struct cnode *cp = VTOC(dvp);
+    struct componentname  *cnp = ap->a_cnp;
+    struct ucred *cred = cnp->cn_cred;
+    struct thread *td = cnp->cn_thread;
+/* locals */
+    int error;
+    const char *nm = cnp->cn_nameptr;
+    int len = cnp->cn_namelen;
+    struct cnode *tp;
+
+    MARK_ENTRY(CODA_REMOVE_STATS);
+
+    CODADEBUG(CODA_REMOVE, myprintf(("remove: %s in %s\n",
+				     nm, coda_f2s(&cp->c_fid))););
+    /* Remove the file's entry from the CODA Name Cache */
+    /* We're being conservative here, it might be that this person
+     * doesn't really have sufficient access to delete the file
+     * but we feel zapping the entry won't really hurt anyone -- dcs
+     */
+    /* I'm gonna go out on a limb here. If a file and a hardlink to it
+     * exist, and one is removed, the link count on the other will be
+     * off by 1. We could either invalidate the attrs if cached, or
+     * fix them. I'll try to fix them. DCS 11/8/94
+     */
+    tp = coda_nc_lookup(VTOC(dvp), nm, len, cred);
+    if (tp) {
+	if (VALID_VATTR(tp)) {	/* If attrs are cached */
+	    if (tp->c_vattr.va_nlink > 1) {	/* If it's a hard link */
+		tp->c_vattr.va_nlink--;
+	    }
+	}
+	
+	coda_nc_zapfile(VTOC(dvp), nm, len); 
+	/* No need to flush it if it doesn't exist! */
+    }
+    /* Invalidate the parent's attr cache, the modification time has changed */
+    VTOC(dvp)->c_flags &= ~C_VATTR;
+
+    /* Check for remove of control object. */
+    if (IS_CTL_NAME(dvp, nm, len)) {
+	MARK_INT_FAIL(CODA_REMOVE_STATS);
+	return(ENOENT);
+    }
+
+    error = venus_remove(vtomi(dvp), &cp->c_fid, nm, len, cred, td->td_proc);
+
+    CODADEBUG(CODA_REMOVE, myprintf(("in remove result %d\n",error)); )
+
+    return(error);
+}
+
+int
+coda_link(struct vop_link_args *ap)
+{
+/* true args */
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    struct vnode *tdvp = ap->a_tdvp;
+    struct cnode *tdcp = VTOC(tdvp);
+    struct componentname *cnp = ap->a_cnp;
+    struct ucred *cred = cnp->cn_cred;
+    struct thread *td = cnp->cn_thread;
+/* locals */
+    int error;
+    const char *nm = cnp->cn_nameptr;
+    int len = cnp->cn_namelen;
+
+    MARK_ENTRY(CODA_LINK_STATS);
+
+    if (codadebug & CODADBGMSK(CODA_LINK)) {
+	myprintf(("nb_link:   vp fid: %s\n",
+		  coda_f2s(&cp->c_fid)));
+	myprintf(("nb_link: tdvp fid: %s)\n",
+		  coda_f2s(&tdcp->c_fid)));	
+    }
+    if (codadebug & CODADBGMSK(CODA_LINK)) {
+	myprintf(("link:   vp fid: %s\n",
+		  coda_f2s(&cp->c_fid)));
+	myprintf(("link: tdvp fid: %s\n",
+		  coda_f2s(&tdcp->c_fid)));
+    }
+
+    /* Check for link to/from control object. */
+    if (IS_CTL_NAME(tdvp, nm, len) || IS_CTL_VP(vp)) {
+	MARK_INT_FAIL(CODA_LINK_STATS);
+	return(EACCES);
+    }
+
+    error = venus_link(vtomi(vp), &cp->c_fid, &tdcp->c_fid, nm, len, cred, td->td_proc);
+
+    /* Invalidate the parent's attr cache, the modification time has changed */
+    VTOC(tdvp)->c_flags &= ~C_VATTR;
+    VTOC(vp)->c_flags &= ~C_VATTR;
+
+    CODADEBUG(CODA_LINK,	myprintf(("in link result %d\n",error)); )
+
+    return(error);
+}
+
+int
+coda_rename(struct vop_rename_args *ap)
+{
+/* true args */
+    struct vnode *odvp = ap->a_fdvp;
+    struct cnode *odcp = VTOC(odvp);
+    struct componentname  *fcnp = ap->a_fcnp;
+    struct vnode *ndvp = ap->a_tdvp;
+    struct cnode *ndcp = VTOC(ndvp);
+    struct componentname  *tcnp = ap->a_tcnp;
+    struct ucred *cred = fcnp->cn_cred;
+    struct thread *td = fcnp->cn_thread;
+/* true args */
+    int error;
+    const char *fnm = fcnp->cn_nameptr;
+    int flen = fcnp->cn_namelen;
+    const char *tnm = tcnp->cn_nameptr;
+    int tlen = tcnp->cn_namelen;
+
+    MARK_ENTRY(CODA_RENAME_STATS);
+
+    /* Hmmm.  The vnodes are already looked up.  Perhaps they are locked?
+       This could be Bad. XXX */
+#ifdef OLD_DIAGNOSTIC
+    if ((fcnp->cn_cred != tcnp->cn_cred)
+	|| (fcnp->cn_thread != tcnp->cn_thread))
+    {
+	panic("coda_rename: component names don't agree");
+    }
+#endif
+
+    /* Check for rename involving control object. */ 
+    if (IS_CTL_NAME(odvp, fnm, flen) || IS_CTL_NAME(ndvp, tnm, tlen)) {
+	MARK_INT_FAIL(CODA_RENAME_STATS);
+	return(EACCES);
+    }
+
+    /* Problem with moving directories -- need to flush entry for .. */
+    if (odvp != ndvp) {
+	struct cnode *ovcp = coda_nc_lookup(VTOC(odvp), fnm, flen, cred);
+	if (ovcp) {
+	    struct vnode *ovp = CTOV(ovcp);
+	    if ((ovp) &&
+		(ovp->v_type == VDIR)) /* If it's a directory */
+		coda_nc_zapfile(VTOC(ovp),"..", 2);
+	}
+    }
+
+    /* Remove the entries for both source and target files */
+    coda_nc_zapfile(VTOC(odvp), fnm, flen);
+    coda_nc_zapfile(VTOC(ndvp), tnm, tlen);
+
+    /* Invalidate the parent's attr cache, the modification time has changed */
+    VTOC(odvp)->c_flags &= ~C_VATTR;
+    VTOC(ndvp)->c_flags &= ~C_VATTR;
+
+    if (flen+1 > CODA_MAXNAMLEN) {
+	MARK_INT_FAIL(CODA_RENAME_STATS);
+	error = EINVAL;
+	goto exit;
+    }
+
+    if (tlen+1 > CODA_MAXNAMLEN) {
+	MARK_INT_FAIL(CODA_RENAME_STATS);
+	error = EINVAL;
+	goto exit;
+    }
+
+    error = venus_rename(vtomi(odvp), &odcp->c_fid, &ndcp->c_fid, fnm, flen, tnm, tlen, cred, td->td_proc);
+
+ exit:
+    CODADEBUG(CODA_RENAME, myprintf(("in rename result %d\n",error));)
+    /* XXX - do we need to call cache pureg on the moved vnode? */
+    cache_purge(ap->a_fvp);
+
+    /* Release parents first, then children. */
+    vrele(odvp);
+    if (ap->a_tvp) {
+	if (ap->a_tvp == ndvp)
+	    vrele(ndvp);
+	else
+	    vput(ndvp);
+	vput(ap->a_tvp);
+    } else
+	vput(ndvp);
+    vrele(ap->a_fvp);
+
+    return(error);
+}
+
+int
+coda_mkdir(struct vop_mkdir_args *ap)
+{
+/* true args */
+    struct vnode *dvp = ap->a_dvp;
+    struct cnode *dcp = VTOC(dvp);	
+    struct componentname  *cnp = ap->a_cnp;
+    register struct vattr *va = ap->a_vap;
+    struct vnode **vpp = ap->a_vpp;
+    struct ucred *cred = cnp->cn_cred;
+    struct thread *td = cnp->cn_thread;
+/* locals */
+    int error;
+    const char *nm = cnp->cn_nameptr;
+    int len = cnp->cn_namelen;
+    struct cnode *cp;
+    CodaFid VFid;
+    struct vattr ova;
+
+    MARK_ENTRY(CODA_MKDIR_STATS);
+
+    /* Check for mkdir of target object. */
+    if (IS_CTL_NAME(dvp, nm, len)) {
+	*vpp = (struct vnode *)0;
+	MARK_INT_FAIL(CODA_MKDIR_STATS);
+	return(EACCES);
+    }
+
+    if (len+1 > CODA_MAXNAMLEN) {
+	*vpp = (struct vnode *)0;
+	MARK_INT_FAIL(CODA_MKDIR_STATS);
+	return(EACCES);
+    }
+
+    error = venus_mkdir(vtomi(dvp), &dcp->c_fid, nm, len, va, cred, td->td_proc, &VFid, &ova);
+
+    if (!error) {
+	if (coda_find(&VFid) != NULL)
+	    panic("cnode existed for newly created directory!");
+	
+	
+	cp =  make_coda_node(&VFid, dvp->v_mount, va->va_type);
+	*vpp = CTOV(cp);
+	
+	/* enter the new vnode in the Name Cache */
+	coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
+
+	/* as a side effect, enter "." and ".." for the directory */
+	coda_nc_enter(VTOC(*vpp), ".", 1, cred, VTOC(*vpp));
+	coda_nc_enter(VTOC(*vpp), "..", 2, cred, VTOC(dvp));
+
+	if (coda_attr_cache) {
+	    VTOC(*vpp)->c_vattr = ova;		/* update the attr cache */
+	    VTOC(*vpp)->c_flags |= C_VATTR;	/* Valid attributes in cnode */
+	}
+
+	/* Invalidate the parent's attr cache, the modification time has changed */
+	VTOC(dvp)->c_flags &= ~C_VATTR;
+
+	vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td);
+
+	CODADEBUG( CODA_MKDIR, myprintf(("mkdir: %s result %d\n",
+					 coda_f2s(&VFid), error)); )
+	} else {
+	*vpp = (struct vnode *)0;
+	CODADEBUG(CODA_MKDIR, myprintf(("mkdir error %d\n",error));)
+    }
+
+    return(error);
+}
+
+int
+coda_rmdir(struct vop_rmdir_args *ap)
+{
+/* true args */
+    struct vnode *dvp = ap->a_dvp;
+    struct cnode *dcp = VTOC(dvp);
+    struct componentname  *cnp = ap->a_cnp;
+    struct ucred *cred = cnp->cn_cred;
+    struct thread *td = cnp->cn_thread;
+/* true args */
+    int error;
+    const char *nm = cnp->cn_nameptr;
+    int len = cnp->cn_namelen;
+    struct cnode *cp;
+   
+    MARK_ENTRY(CODA_RMDIR_STATS);
+
+    /* Check for rmdir of control object. */
+    if (IS_CTL_NAME(dvp, nm, len)) {
+	MARK_INT_FAIL(CODA_RMDIR_STATS);
+	return(ENOENT);
+    }
+
+    /* We're being conservative here, it might be that this person
+     * doesn't really have sufficient access to delete the file
+     * but we feel zapping the entry won't really hurt anyone -- dcs
+     */
+    /*
+     * As a side effect of the rmdir, remove any entries for children of
+     * the directory, especially "." and "..".
+     */
+    cp = coda_nc_lookup(dcp, nm, len, cred);
+    if (cp) coda_nc_zapParentfid(&(cp->c_fid), NOT_DOWNCALL);
+
+    /* Remove the file's entry from the CODA Name Cache */
+    coda_nc_zapfile(dcp, nm, len);
+
+    /* Invalidate the parent's attr cache, the modification time has changed */
+    dcp->c_flags &= ~C_VATTR;
+
+    error = venus_rmdir(vtomi(dvp), &dcp->c_fid, nm, len, cred, td->td_proc);
+
+    CODADEBUG(CODA_RMDIR, myprintf(("in rmdir result %d\n", error)); )
+
+    return(error);
+}
+
+int
+coda_symlink(struct vop_symlink_args *ap)
+{
+/* true args */
+    struct vnode *tdvp = ap->a_dvp;
+    struct cnode *tdcp = VTOC(tdvp);	
+    struct componentname *cnp = ap->a_cnp;
+    struct vattr *tva = ap->a_vap;
+    char *path = ap->a_target;
+    struct ucred *cred = cnp->cn_cred;
+    struct thread *td = cnp->cn_thread;
+    struct vnode **vpp = ap->a_vpp;
+/* locals */
+    int error;
+    /* 
+     * XXX I'm assuming the following things about coda_symlink's
+     * arguments: 
+     *       t(foo) is the new name/parent/etc being created.
+     *       lname is the contents of the new symlink. 
+     */
+    char *nm = cnp->cn_nameptr;
+    int len = cnp->cn_namelen;
+    int plen = strlen(path);
+
+    /* 
+     * Here's the strategy for the moment: perform the symlink, then
+     * do a lookup to grab the resulting vnode.  I know this requires
+     * two communications with Venus for a new sybolic link, but
+     * that's the way the ball bounces.  I don't yet want to change
+     * the way the Mach symlink works.  When Mach support is
+     * deprecated, we should change symlink so that the common case
+     * returns the resultant vnode in a vpp argument.
+     */
+
+    MARK_ENTRY(CODA_SYMLINK_STATS);
+
+    /* Check for symlink of control object. */
+    if (IS_CTL_NAME(tdvp, nm, len)) {
+	MARK_INT_FAIL(CODA_SYMLINK_STATS);
+	return(EACCES);
+    }
+
+    if (plen+1 > CODA_MAXPATHLEN) {
+	MARK_INT_FAIL(CODA_SYMLINK_STATS);
+	return(EINVAL);
+    }
+
+    if (len+1 > CODA_MAXNAMLEN) {
+	MARK_INT_FAIL(CODA_SYMLINK_STATS);
+	error = EINVAL;
+	goto exit;
+    }
+
+    error = venus_symlink(vtomi(tdvp), &tdcp->c_fid, path, plen, nm, len, tva, cred, td->td_proc);
+
+    /* Invalidate the parent's attr cache, the modification time has changed */
+    tdcp->c_flags &= ~C_VATTR;
+
+    if (error == 0)
+	error = VOP_LOOKUP(tdvp, vpp, cnp);
+
+ exit:    
+    CODADEBUG(CODA_SYMLINK, myprintf(("in symlink result %d\n",error)); )
+    return(error);
+}
+
+/*
+ * Read directory entries.
+ */
+int
+coda_readdir(struct vop_readdir_args *ap)
+{
+/* true args */
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    register struct uio *uiop = ap->a_uio;
+    struct ucred *cred = ap->a_cred;
+    int *eofflag = ap->a_eofflag;
+    u_long **cookies = ap->a_cookies;
+    int *ncookies = ap->a_ncookies;
+    struct thread *td = ap->a_uio->uio_td;
+/* upcall decl */
+/* locals */
+    int error = 0;
+
+    MARK_ENTRY(CODA_READDIR_STATS);
+
+    CODADEBUG(CODA_READDIR, myprintf(("coda_readdir(%p, %d, %lld, %d)\n",
+				      (void *)uiop->uio_iov->iov_base,
+				      uiop->uio_resid,
+				      (long long)uiop->uio_offset,
+				      uiop->uio_segflg)); )
+	
+    /* Check for readdir of control object. */
+    if (IS_CTL_VP(vp)) {
+	MARK_INT_FAIL(CODA_READDIR_STATS);
+	return(ENOENT);
+    }
+
+    {
+	/* If directory is not already open do an "internal open" on it. */
+	int opened_internally = 0;
+	if (cp->c_ovp == NULL) {
+	    opened_internally = 1;
+	    MARK_INT_GEN(CODA_OPEN_STATS);
+	    error = VOP_OPEN(vp, FREAD, cred, td, NULL);
+	    printf("coda_readdir: Internally Opening %p\n", vp);
+	    if (error) {
+		printf("coda_readdir: VOP_OPEN on container failed %d\n", error);
+		return (error);
+	    }
+	}
+	
+	/* Have UFS handle the call. */
+	CODADEBUG(CODA_READDIR, myprintf(("indirect readdir: fid = %s, refcnt = %d\n", coda_f2s(&cp->c_fid), vp->v_usecount)); )
+	error = VOP_READDIR(cp->c_ovp, uiop, cred, eofflag, ncookies,
+			       cookies);
+	
+	if (error)
+	    MARK_INT_FAIL(CODA_READDIR_STATS);
+	else
+	    MARK_INT_SAT(CODA_READDIR_STATS);
+	
+	/* Do an "internal close" if necessary. */ 
+	if (opened_internally) {
+	    MARK_INT_GEN(CODA_CLOSE_STATS);
+	    (void)VOP_CLOSE(vp, FREAD, cred, td);
+	}
+    }
+
+    return(error);
+}
+
+/*
+ * Convert from filesystem blocks to device blocks
+ */
+int
+coda_bmap(struct vop_bmap_args *ap)
+{
+    /* XXX on the global proc */
+/* true args */
+    struct vnode *vp __attribute__((unused)) = ap->a_vp;	/* file's vnode */
+    daddr_t bn __attribute__((unused)) = ap->a_bn;	/* fs block number */
+    struct bufobj **bop = ap->a_bop;			/* RETURN bufobj of device */
+    daddr_t *bnp __attribute__((unused)) = ap->a_bnp;	/* RETURN device block number */
+    struct thread *td __attribute__((unused)) = curthread;
+/* upcall decl */
+/* locals */
+
+	int ret = 0;
+	struct cnode *cp;
+
+	cp = VTOC(vp);
+	if (cp->c_ovp) {
+		return EINVAL;
+		ret =  VOP_BMAP(cp->c_ovp, bn, bop, bnp, ap->a_runp, ap->a_runb);
+#if	0
+		printf("VOP_BMAP(cp->c_ovp %p, bn %p, bop %p, bnp %lld, ap->a_runp %p, ap->a_runb %p) = %d\n",
+			cp->c_ovp, bn, bop, bnp, ap->a_runp, ap->a_runb, ret);
+#endif
+		return ret;
+	} else {
+#if	0
+		printf("coda_bmap: no container\n");
+#endif
+		return(EOPNOTSUPP);
+	}
+}
+
+int
+coda_reclaim(struct vop_reclaim_args *ap)
+{
+/* true args */
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+/* upcall decl */
+/* locals */
+
+/*
+ * Forced unmount/flush will let vnodes with non zero use be destroyed!
+ */
+    ENTRY;
+
+    if (IS_UNMOUNTING(cp)) {
+#ifdef	DEBUG
+	if (VTOC(vp)->c_ovp) {
+	    if (IS_UNMOUNTING(cp))
+		printf("coda_reclaim: c_ovp not void: vp %p, cp %p\n", vp, cp);
+	}
+#endif
+    } else {
+#ifdef OLD_DIAGNOSTIC
+	if (vrefcnt(vp) != 0) 
+	    print("coda_reclaim: pushing active %p\n", vp);
+	if (VTOC(vp)->c_ovp) {
+	    panic("coda_reclaim: c_ovp not void");
+    }
+#endif
+    }	
+    cache_purge(vp);
+    coda_free(VTOC(vp));
+    vp->v_data = NULL;
+    vp->v_object = NULL;
+    return (0);
+}
+
+int
+coda_lock(struct vop_lock1_args *ap)
+{
+/* true args */
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+/* upcall decl */
+/* locals */
+
+    ENTRY;
+
+    if ((ap->a_flags & LK_INTERLOCK) == 0) {
+	VI_LOCK(vp);
+	ap->a_flags |= LK_INTERLOCK;
+    }
+
+    if (coda_lockdebug) {
+	myprintf(("Attempting lock on %s\n",
+		  coda_f2s(&cp->c_fid)));
+    }
+
+    return (vop_stdlock(ap));
+}
+
+int
+coda_unlock(struct vop_unlock_args *ap)
+{
+/* true args */
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+/* upcall decl */
+/* locals */
+
+    ENTRY;
+    if (coda_lockdebug) {
+	myprintf(("Attempting unlock on %s\n",
+		  coda_f2s(&cp->c_fid)));
+    }
+
+    return (vop_stdunlock(ap));
+}
+
+int
+coda_islocked(struct vop_islocked_args *ap)
+{
+/* true args */
+    ENTRY;
+
+    return (vop_stdislocked(ap));
+}
+
+void
+print_vattr(struct vattr *attr)
+{
+    char *typestr;
+
+    switch (attr->va_type) {
+    case VNON:
+	typestr = "VNON";
+	break;
+    case VREG:
+	typestr = "VREG";
+	break;
+    case VDIR:
+	typestr = "VDIR";
+	break;
+    case VBLK:
+	typestr = "VBLK";
+	break;
+    case VCHR:
+	typestr = "VCHR";
+	break;
+    case VLNK:
+	typestr = "VLNK";
+	break;
+    case VSOCK:
+	typestr = "VSCK";
+	break;
+    case VFIFO:
+	typestr = "VFFO";
+	break;
+    case VBAD:
+	typestr = "VBAD";
+	break;
+    default:
+	typestr = "????";
+	break;
+    }
+
+
+    myprintf(("attr: type %s mode %d uid %d gid %d fsid %d rdev %d\n",
+	      typestr, (int)attr->va_mode, (int)attr->va_uid,
+	      (int)attr->va_gid, (int)attr->va_fsid, (int)attr->va_rdev));
+
+    myprintf(("      fileid %d nlink %d size %d blocksize %d bytes %d\n",
+	      (int)attr->va_fileid, (int)attr->va_nlink, 
+	      (int)attr->va_size,
+	      (int)attr->va_blocksize,(int)attr->va_bytes));
+    myprintf(("      gen %ld flags %ld vaflags %d\n",
+	      attr->va_gen, attr->va_flags, attr->va_vaflags));
+    myprintf(("      atime sec %d nsec %d\n",
+	      (int)attr->va_atime.tv_sec, (int)attr->va_atime.tv_nsec));
+    myprintf(("      mtime sec %d nsec %d\n",
+	      (int)attr->va_mtime.tv_sec, (int)attr->va_mtime.tv_nsec));
+    myprintf(("      ctime sec %d nsec %d\n",
+	      (int)attr->va_ctime.tv_sec, (int)attr->va_ctime.tv_nsec));
+}
+
+/* How to print a ucred */
+void
+print_cred(struct ucred *cred)
+{
+
+	int i;
+
+	myprintf(("ref %d\tuid %d\n",cred->cr_ref,cred->cr_uid));
+
+	for (i=0; i < cred->cr_ngroups; i++)
+		myprintf(("\tgroup %d: (%d)\n",i,cred->cr_groups[i]));
+	myprintf(("\n"));
+
+}
+
+/*
+ * Return a vnode for the given fid.
+ * If no cnode exists for this fid create one and put it
+ * in a table hashed by coda_f2i().  If the cnode for
+ * this fid is already in the table return it (ref count is
+ * incremented by coda_find.  The cnode will be flushed from the
+ * table when coda_inactive calls coda_unsave.
+ */
+struct cnode *
+make_coda_node(CodaFid *fid, struct mount *vfsp, short type)
+{
+    struct cnode *cp;
+    int          err;
+
+    if ((cp = coda_find(fid)) == NULL) {
+	struct vnode *vp;
+	
+	cp = coda_alloc();
+	cp->c_fid = *fid;
+	
+	err = getnewvnode("coda", vfsp, &coda_vnodeops, &vp);  
+	if (err) {                                                
+	    panic("coda: getnewvnode returned error %d\n", err);   
+	}                                                         
+	err = insmntque1(vp, vfsp, NULL, NULL);	/* XXX: Too early for mpsafe fs */
+	if (err != 0)
+		panic("coda: insmntque failed: error %d", err);
+	vp->v_data = cp;                                          
+	vp->v_type = type;                                      
+	cp->c_vnode = vp;                                         
+	coda_save(cp);
+	
+    } else {
+	vref(CTOV(cp));
+    }
+
+    return cp;
+}
+
+int
+coda_pathconf(struct vop_pathconf_args *ap)
+{
+	int error;
+	register_t *retval;
+
+	retval = ap->a_retval;
+	error = 0;
+
+	switch (ap->a_name) {
+	case _PC_NAME_MAX:
+		*retval = CODA_MAXNAMLEN;
+		break;
+	case _PC_PATH_MAX:
+		*retval = CODA_MAXPATHLEN;
+		break;
+	default:
+		error = vop_stdpathconf(ap);
+		break;
+	}
+
+	return (error);
+}
--- /dev/null
+++ sys/fs/coda/coda_opstats.h
@@ -0,0 +1,127 @@
+/*-
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_opstats.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ * $FreeBSD: src/sys/fs/coda/coda_opstats.h,v 1.7 2007/07/12 20:40:37 rwatson Exp $
+ * 
+ */
+
+/*
+ * operation stats: what the minicache can intercept that
+ * *isn't* seen by venus.  These stats are kept to augment
+ * the stats maintained by the Volume-Session mechanism.
+ */
+
+/* vfsops:
+ *          mount: not currently bounced to Venus
+ *          umount: nope
+ *          root: only first call, rest is cached.
+ *          statfs: none (bogus)
+ *          sync: none (bogus)
+ *          vget: all
+ */
+
+#define CODA_MOUNT_STATS  0
+#define CODA_UMOUNT_STATS 1
+#define CODA_ROOT_STATS   2
+#define CODA_STATFS_STATS 3
+#define CODA_SYNC_STATS   4
+#define CODA_VGET_STATS   5
+#define CODA_VFSOPS_SIZE  6
+
+/* vnodeops:
+ *            open: all to venus
+ *            close: all to venus
+ *            rdrw: bogus.  Maybe redirected to UFS.
+ *                          May call open/close for internal opens/closes
+ *                          (Does exec not call open?)
+ *            ioctl: causes a lookupname
+ *                   passes through
+ *            select: can't get there from here.
+ *            getattr: can be satsified by cache
+ *            setattr: all go through
+ *            access: can be satisfied by cache
+ *            readlink: can be satisfied by cache
+ *            fsync: passes through
+ *            inactive: passes through
+ *            lookup: can be satisfied by cache
+ *            create: passes through
+ *            remove: passes through
+ *            link: passes through
+ *            rename: passes through
+ *            mkdir: passes through
+ *            rmdir: passes through
+ *            symlink: passes through
+ *            readdir: may be redirected to UFS
+ *                     may cause an "internal" open/close
+ */
+
+#define CODA_OPEN_STATS     0
+#define CODA_CLOSE_STATS    1
+#define CODA_RDWR_STATS     2
+#define CODA_IOCTL_STATS    3
+#define CODA_SELECT_STATS   4
+#define CODA_GETATTR_STATS  5
+#define CODA_SETATTR_STATS  6
+#define CODA_ACCESS_STATS   7
+#define CODA_READLINK_STATS 8
+#define CODA_FSYNC_STATS    9
+#define CODA_INACTIVE_STATS 10
+#define CODA_LOOKUP_STATS   11
+#define CODA_CREATE_STATS   12
+#define CODA_REMOVE_STATS   13
+#define CODA_LINK_STATS     14
+#define CODA_RENAME_STATS   15
+#define CODA_MKDIR_STATS    16
+#define CODA_RMDIR_STATS    17
+#define CODA_SYMLINK_STATS  18
+#define CODA_READDIR_STATS  19
+#define CODA_VNODEOPS_SIZE  20
+
+/*
+ * I propose the following structres:
+ */
+
+struct coda_op_stats {
+    int opcode;       /* vfs opcode */
+    long entries;     /* number of times call attempted */
+    long sat_intrn;   /* number of times call satisfied by cache */
+    long unsat_intrn; /* number of times call failed in cache, but
+                         was not bounced to venus proper. */
+    long gen_intrn;   /* number of times call generated internally */
+                      /* (do we need that?) */
+};
+
+/*
+ * With each call to the minicache, we'll bump the counters whenver
+ * a call is satisfied internally (through the cache or through a
+ * redirect), and whenever an operation is caused internally.
+ * Then, we can add the total operations caught by the minicache
+ * to the world-wide totals, and leave a caveat for the specific
+ * graphs later.
+ */
--- /dev/null
+++ sys/fs/coda/coda_subr.h
@@ -0,0 +1,45 @@
+/*-
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_subr.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ * $FreeBSD: src/sys/fs/coda/coda_subr.h,v 1.10 2007/07/12 20:40:37 rwatson Exp $
+ * 
+ */
+
+struct cnode *coda_alloc(void);
+void  coda_free(struct cnode *cp);
+struct cnode *coda_find(CodaFid *fid);
+void coda_flush(enum dc_status dcstat);
+void coda_testflush(void);
+void coda_checkunmounting(struct mount *mp);
+void coda_cacheprint(struct mount *whoIam);
+void coda_debugon(void);
+void coda_debugoff(void);
+int  coda_kill(struct mount *whoIam, enum dc_status dcstat);
+void coda_save(struct cnode *cp);
+void coda_unsave(struct cnode *cp);
--- /dev/null
+++ sys/fs/coda/coda_vnops.h
@@ -0,0 +1,86 @@
+/*-
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_vnops.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $FreeBSD: src/sys/fs/coda/coda_vnops.h,v 1.19 2007/07/12 20:40:38 rwatson Exp $
+ * 
+  */
+
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda filesystem at Carnegie Mellon
+ * University.  Contributers include David Steere, James Kistler, and
+ * M. Satyanarayanan.  
+ */
+
+
+/* NetBSD interfaces to the vnodeops */
+vop_open_t coda_open;
+vop_close_t coda_close;
+vop_read_t coda_read;
+vop_write_t coda_write;
+vop_ioctl_t coda_ioctl;
+/* 1.3 int cfs_select(void *);*/
+vop_getattr_t coda_getattr;
+vop_setattr_t coda_setattr;
+vop_access_t coda_access;
+int coda_abortop(void *);
+vop_readlink_t coda_readlink;
+vop_fsync_t coda_fsync;
+vop_inactive_t coda_inactive;
+vop_lookup_t coda_lookup;
+vop_create_t coda_create;
+vop_remove_t coda_remove;
+vop_link_t coda_link;
+vop_rename_t coda_rename;
+vop_mkdir_t coda_mkdir;
+vop_rmdir_t coda_rmdir;
+vop_symlink_t coda_symlink;
+vop_readdir_t coda_readdir;
+vop_bmap_t coda_bmap;
+vop_strategy_t coda_strategy;
+vop_reclaim_t coda_reclaim;
+vop_lock1_t coda_lock;
+vop_unlock_t coda_unlock;
+vop_islocked_t coda_islocked;
+int coda_vop_error(void *);
+int coda_vop_nop(void *);
+vop_pathconf_t coda_pathconf;
+
+int coda_rdwr(struct vnode *vp, struct uio *uiop, enum uio_rw rw,
+    int ioflag, struct ucred *cred, struct thread *td);
+int coda_grab_vnode(struct cdev *dev, ino_t ino, struct vnode **vpp);
+void print_vattr(struct vattr *attr);
+void print_cred(struct ucred *cred);
--- /dev/null
+++ sys/fs/coda/coda.h
@@ -0,0 +1,830 @@
+/*-
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ * $FreeBSD: src/sys/fs/coda/coda.h,v 1.16 2007/07/12 20:40:37 rwatson Exp $
+ * 
+ */
+
+
+/*
+ *
+ * Based on cfs.h from Mach, but revamped for increased simplicity.
+ * Linux modifications by Peter Braam, Aug 1996
+ */
+
+#ifndef _CODA_HEADER_
+#define _CODA_HEADER_
+
+#include "opt_coda.h"	/* for CODA_COMPAT_5 option */
+
+/* Avoid CODA_COMPAT_5 redefinition in coda5 module */
+#if defined (CODA5_MODULE) && !defined(CODA_COMPAT_5)
+#define CODA_COMPAT_5
+#endif
+
+/* Catch new _KERNEL defn for NetBSD */
+#ifdef __NetBSD__
+#include <sys/types.h>
+#endif 
+
+#ifndef CODA_MAXSYMLINKS
+#define CODA_MAXSYMLINKS 10
+#endif
+
+#if defined(DJGPP) || defined(__CYGWIN32__)
+#ifdef _KERNEL
+typedef unsigned long u_long;
+typedef unsigned int u_int;
+typedef unsigned short u_short;
+typedef u_long ino_t;
+typedef u_long struct cdev *;
+typedef void * caddr_t;
+#ifdef DOS
+typedef unsigned __int64 u_quad_t;
+#else 
+typedef unsigned long long u_quad_t;
+#endif
+
+#define inline
+
+struct timespec {
+        long       ts_sec;
+        long       ts_nsec;
+};
+#else  /* DJGPP but not _KERNEL */
+#include <sys/types.h>
+#include <sys/time.h>
+typedef unsigned long long u_quad_t;
+#endif /* !_KERNEL */
+#endif /* !DJGPP */
+
+
+#if defined(__linux__)
+#define cdev_t u_quad_t
+#if !defined(_UQUAD_T_) && (!defined(__GLIBC__) || __GLIBC__ < 2)
+#define _UQUAD_T_ 1
+typedef unsigned long long u_quad_t;
+#endif
+#else
+#define cdev_t dev_t
+#endif
+
+#ifdef __CYGWIN32__
+typedef unsigned char u_int8_t;
+struct timespec {
+        time_t  tv_sec;         /* seconds */
+        long    tv_nsec;        /* nanoseconds */
+};
+#endif
+
+
+/*
+ * Cfs constants
+ */
+#define CODA_MAXNAMLEN   255
+#define CODA_MAXPATHLEN  1024
+#define CODA_MAXSYMLINK  10
+
+/* these are Coda's version of O_RDONLY etc combinations
+ * to deal with VFS open modes
+ */
+#define	C_O_READ	0x001
+#define	C_O_WRITE       0x002
+#define C_O_TRUNC       0x010
+#define C_O_EXCL	0x100
+#define C_O_CREAT	0x200
+
+/* these are to find mode bits in Venus */ 
+#define C_M_READ  00400
+#define C_M_WRITE 00200
+
+/* for access Venus will use */
+#define C_A_C_OK    8               /* Test for writing upon create.  */
+#define C_A_R_OK    4               /* Test for read permission.  */
+#define C_A_W_OK    2               /* Test for write permission.  */
+#define C_A_X_OK    1               /* Test for execute permission.  */
+#define C_A_F_OK    0               /* Test for existence.  */
+
+
+
+#ifndef _VENUS_DIRENT_T_
+#define _VENUS_DIRENT_T_ 1
+struct venus_dirent {
+        unsigned long	d_fileno;		/* file number of entry */
+        unsigned short	d_reclen;		/* length of this record */
+        char 		d_type;			/* file type, see below */
+        char		d_namlen;		/* length of string in d_name */
+        char		d_name[CODA_MAXNAMLEN + 1];/* name must be no longer than this */
+};
+#undef DIRSIZ
+#define DIRSIZ(dp)      ((sizeof (struct venus_dirent) - (CODA_MAXNAMLEN+1)) + \
+                         (((dp)->d_namlen+1 + 3) &~ 3))
+
+/*
+ * File types
+ */
+#define	CDT_UNKNOWN	 0
+#define	CDT_FIFO	 1
+#define	CDT_CHR		 2
+#define	CDT_DIR		 4
+#define	CDT_BLK		 6
+#define	CDT_REG		 8
+#define	CDT_LNK		10
+#define	CDT_SOCK	12
+#define	CDT_WHT		14
+
+/*
+ * Convert between stat structure types and directory types.
+ */
+#define	IFTOCDT(mode)	(((mode) & 0170000) >> 12)
+#define	CDTTOIF(dirtype)	((dirtype) << 12)
+
+#endif
+
+#ifdef CODA_COMPAT_5
+
+typedef struct {
+    u_long Volume;
+    u_long Vnode;
+    u_long Unique;      
+} CodaFid;
+
+static __inline__ ino_t coda_f2i(CodaFid *fid)
+{
+	if (!fid) return 0;
+	return (fid->Unique + (fid->Vnode<<10) + (fid->Volume<<20));
+}
+
+static __inline__ char * coda_f2s(CodaFid *fid)
+{
+  static char fid_str [35];
+  snprintf (fid_str, 35, "[%lx.%lx.%lx]", fid->Volume,
+	    fid->Vnode, fid->Unique);
+  return fid_str;
+}
+ 
+static __inline__ int coda_fid_eq (CodaFid *fid1, CodaFid *fid2)
+{
+  return (fid1->Volume == fid2->Volume &&
+	  fid1->Vnode == fid2->Vnode &&
+	  fid1->Unique == fid2->Unique);
+}
+  
+struct coda_cred {
+    u_int32_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, efftve, set, fs uid*/
+    u_int32_t cr_groupid,     cr_egid, cr_sgid, cr_fsgid; /* same for groups */
+};
+
+#else	/* CODA_COMPAT_5 */
+
+typedef struct  {
+	u_int32_t opaque[4];
+} CodaFid;
+
+static __inline__ ino_t  coda_f2i(CodaFid *fid)
+{
+    if ( ! fid ) 
+	return 0; 
+    return (fid->opaque[3] ^ (fid->opaque[2]<<10) ^ (fid->opaque[1]<<20) ^ fid->opaque[0]);
+}
+	
+static __inline__ char * coda_f2s(CodaFid *fid)
+ {
+     static char fid_str [35];
+     snprintf (fid_str, 35, "[%x.%x.%x.%x]", fid->opaque[0],
+	       fid->opaque[1], fid->opaque[2], fid->opaque[3]);
+     return fid_str;
+ }
+
+static __inline__ int coda_fid_eq (CodaFid *fid1, CodaFid *fid2)
+{
+  return (fid1->opaque[0] == fid2->opaque[0] &&
+	  fid1->opaque[1] == fid2->opaque[1] &&
+	  fid1->opaque[2] == fid2->opaque[2] &&
+	  fid1->opaque[3] == fid2->opaque[3]);
+}
+
+#endif	/* CODA_COMPAT_5 */
+
+#ifndef _VENUS_VATTR_T_
+#define _VENUS_VATTR_T_
+/*
+ * Vnode types.  VNON means no type.
+ */
+enum coda_vtype	{ C_VNON, C_VREG, C_VDIR, C_VBLK, C_VCHR, C_VLNK, C_VSOCK, C_VFIFO, C_VBAD };
+
+struct coda_vattr {
+	int     	va_type;	/* vnode type (for create) */
+	u_short		va_mode;	/* files access mode and type */
+	short		va_nlink;	/* number of references to file */
+	uid_t		va_uid;		/* owner user id */
+	gid_t		va_gid;		/* owner group id */
+	long		va_fileid;	/* file id */
+	u_quad_t	va_size;	/* file size in bytes */
+	long		va_blocksize;	/* blocksize preferred for i/o */
+	struct timespec	va_atime;	/* time of last access */
+	struct timespec	va_mtime;	/* time of last modification */
+	struct timespec	va_ctime;	/* time file changed */
+	u_long		va_gen;		/* generation number of file */
+	u_long		va_flags;	/* flags defined for file */
+	cdev_t	        va_rdev;	/* device special file represents */
+	u_quad_t	va_bytes;	/* bytes of disk space held by file */
+	u_quad_t	va_filerev;	/* file modification number */
+};
+
+#endif 
+
+/* structure used by CODA_STATFS for getting cache information from venus */
+struct coda_statfs {
+    int32_t f_blocks;
+    int32_t f_bfree;
+    int32_t f_bavail;
+    int32_t f_files;
+    int32_t f_ffree;
+};
+
+/*
+ * Kernel <--> Venus communications.
+ */
+
+#define CODA_ROOT	2
+#define CODA_OPEN_BY_FD	3
+#define CODA_OPEN	4
+#define CODA_CLOSE	5
+#define CODA_IOCTL	6
+#define CODA_GETATTR	7
+#define CODA_SETATTR	8
+#define CODA_ACCESS	9
+#define CODA_LOOKUP	10
+#define CODA_CREATE	11
+#define CODA_REMOVE	12
+#define CODA_LINK	13
+#define CODA_RENAME	14
+#define CODA_MKDIR	15
+#define CODA_RMDIR	16
+#define CODA_READDIR	17
+#define CODA_SYMLINK	18
+#define CODA_READLINK	19
+#define CODA_FSYNC	20
+#define CODA_INACTIVE	21
+#define CODA_VGET	22
+#define CODA_SIGNAL	23
+#define CODA_REPLACE	24
+#define CODA_FLUSH       25
+#define CODA_PURGEUSER   26
+#define CODA_ZAPFILE     27
+#define CODA_ZAPDIR      28
+#define CODA_PURGEFID    30
+#define CODA_OPEN_BY_PATH 31
+#define CODA_RESOLVE     32
+#define CODA_REINTEGRATE 33
+#define CODA_STATFS	 34
+#define CODA_NCALLS 35
+
+#define DOWNCALL(opcode) (opcode >= CODA_REPLACE && opcode <= CODA_PURGEFID)
+
+#define VC_MAXDATASIZE	    8192
+#define VC_MAXMSGSIZE      sizeof(union inputArgs)+sizeof(union outputArgs) +\
+                            VC_MAXDATASIZE  
+
+#define CIOC_KERNEL_VERSION _IOWR('c', 10, sizeof (int))
+#if	0
+	/* don't care about kernel version number */
+#define CODA_KERNEL_VERSION 0
+	/* The old venus 4.6 compatible interface */
+#define CODA_KERNEL_VERSION 1
+#endif  /* realms/cells */
+#ifdef CODA_COMPAT_5
+	/* venus_lookup gets an extra parameter to aid windows.*/
+#define CODA_KERNEL_VERSION 2
+#else
+	/* 128-bit fids for realms */
+#define CODA_KERNEL_VERSION 3 
+#endif
+
+/*
+ *        Venus <-> Coda  RPC arguments
+ */
+#ifdef CODA_COMPAT_5
+struct coda_in_hdr {
+    unsigned long opcode;
+    unsigned long unique;           /* Keep multiple outstanding msgs distinct */
+    u_short pid;                    /* Common to all */
+    u_short pgid;                   /* Common to all */
+    u_short sid;                    /* Common to all */
+    struct coda_cred cred;          /* Common to all */    
+};
+#else
+struct coda_in_hdr {
+    u_int32_t opcode;
+    u_int32_t unique;	    /* Keep multiple outstanding msgs distinct */
+    pid_t pid;		    /* Common to all */
+    pid_t pgid;		    /* Common to all */
+    uid_t uid;		    /* Common to all */
+};
+#endif
+
+/* Really important that opcode and unique are 1st two fields! */
+struct coda_out_hdr {
+    unsigned long opcode;
+    unsigned long unique;	
+    unsigned long result;
+};
+
+/* coda_root: NO_IN */
+struct coda_root_out {
+    struct coda_out_hdr oh;
+    CodaFid Fid;
+};
+
+struct coda_root_in {
+    struct coda_in_hdr in;
+};
+
+/* coda_sync: */
+/* Nothing needed for coda_sync */
+
+/* coda_open: */
+struct coda_open_in {
+    struct coda_in_hdr ih;
+    CodaFid	Fid;
+    int	flags;
+};
+
+struct coda_open_out {
+    struct coda_out_hdr oh;
+    cdev_t	dev;
+    ino_t	inode;
+};
+
+
+/* coda_close: */
+struct coda_close_in {
+    struct coda_in_hdr ih;
+    CodaFid	Fid;
+    int	flags;
+};
+
+struct coda_close_out {
+    struct coda_out_hdr out;
+};
+
+/* coda_ioctl: */
+struct coda_ioctl_in {
+    struct coda_in_hdr ih;
+    CodaFid Fid;
+    int	cmd;
+    int	len;
+    int	rwflag;
+    char *data;			/* Place holder for data. */
+};
+
+struct coda_ioctl_out {
+    struct coda_out_hdr oh;
+    int	len;
+    caddr_t	data;		/* Place holder for data. */
+};
+
+
+/* coda_getattr: */
+struct coda_getattr_in {
+    struct coda_in_hdr ih;
+    CodaFid Fid;
+};
+
+struct coda_getattr_out {
+    struct coda_out_hdr oh;
+    struct coda_vattr attr;
+};
+
+
+/* coda_setattr: NO_OUT */
+struct coda_setattr_in {
+    struct coda_in_hdr ih;
+    CodaFid Fid;
+    struct coda_vattr attr;
+};
+
+struct coda_setattr_out {
+    struct coda_out_hdr out;
+};
+
+/* coda_access: NO_OUT */
+struct coda_access_in {
+    struct coda_in_hdr ih;
+    CodaFid	Fid;
+    int	flags;
+};
+
+struct coda_access_out {
+    struct coda_out_hdr out;
+};
+
+
+/* lookup flags */
+#define CLU_CASE_SENSITIVE     0x01
+#define CLU_CASE_INSENSITIVE   0x02
+
+/* coda_lookup: */
+struct  coda_lookup_in {
+    struct coda_in_hdr ih;
+    CodaFid	Fid;
+    int         name;		/* Place holder for data. */
+    int         flags;	
+};
+
+struct coda_lookup_out {
+    struct coda_out_hdr oh;
+    CodaFid Fid;
+    int	vtype;
+};
+
+
+/* coda_create: */
+struct coda_create_in {
+    struct coda_in_hdr ih;
+    CodaFid Fid;
+    struct coda_vattr attr;
+    int excl;
+    int mode;
+    int 	name;		/* Place holder for data. */
+};
+
+struct coda_create_out {
+    struct coda_out_hdr oh;
+    CodaFid Fid;
+    struct coda_vattr attr;
+};
+
+
+/* coda_remove: NO_OUT */
+struct coda_remove_in {
+    struct coda_in_hdr ih;
+    CodaFid	Fid;
+    int name;		/* Place holder for data. */
+};
+
+struct coda_remove_out {
+    struct coda_out_hdr out;
+};
+
+/* coda_link: NO_OUT */
+struct coda_link_in {
+    struct coda_in_hdr ih;
+    CodaFid sourceFid;          /* cnode to link *to* */
+    CodaFid destFid;            /* Directory in which to place link */
+    int tname;		/* Place holder for data. */
+};
+
+struct coda_link_out {
+    struct coda_out_hdr out;
+};
+
+
+/* coda_rename: NO_OUT */
+struct coda_rename_in {
+    struct coda_in_hdr ih;
+    CodaFid	sourceFid;
+    int 	srcname;
+    CodaFid destFid;
+    int 	destname;
+};
+
+struct coda_rename_out {
+    struct coda_out_hdr out;
+};
+
+/* coda_mkdir: */
+struct coda_mkdir_in {
+    struct coda_in_hdr ih;
+    CodaFid	Fid;
+    struct coda_vattr attr;
+    int	   name;		/* Place holder for data. */
+};
+
+struct coda_mkdir_out {
+    struct coda_out_hdr oh;
+    CodaFid Fid;
+    struct coda_vattr attr;
+};
+
+
+/* coda_rmdir: NO_OUT */
+struct coda_rmdir_in {
+    struct coda_in_hdr ih;
+    CodaFid	Fid;
+    int name;		/* Place holder for data. */
+};
+
+struct coda_rmdir_out {
+    struct coda_out_hdr out;
+};
+
+/* coda_readdir: */
+struct coda_readdir_in {
+    struct coda_in_hdr ih;
+    CodaFid	Fid;
+    int	count;
+    int	offset;
+};
+
+struct coda_readdir_out {
+    struct coda_out_hdr oh;
+    int	size;
+    caddr_t	data;		/* Place holder for data. */
+};
+
+/* coda_symlink: NO_OUT */
+struct coda_symlink_in {
+    struct coda_in_hdr ih;
+    CodaFid	Fid;          /* Directory to put symlink in */
+    int srcname;
+    struct coda_vattr attr;
+    int tname;
+};
+
+struct coda_symlink_out {
+    struct coda_out_hdr out;
+};
+
+/* coda_readlink: */
+struct coda_readlink_in {
+    struct coda_in_hdr ih;
+    CodaFid Fid;
+};
+
+struct coda_readlink_out {
+    struct coda_out_hdr oh;
+    int	count;
+    caddr_t	data;		/* Place holder for data. */
+};
+
+
+/* coda_fsync: NO_OUT */
+struct coda_fsync_in {
+    struct coda_in_hdr ih;
+    CodaFid Fid;
+};
+
+struct coda_fsync_out {
+    struct coda_out_hdr out;
+};
+
+/* coda_inactive: NO_OUT */
+struct coda_inactive_in {
+    struct coda_in_hdr ih;
+    CodaFid Fid;
+};
+
+/* coda_vget: */
+struct coda_vget_in {
+    struct coda_in_hdr ih;
+    CodaFid Fid;
+};
+
+struct coda_vget_out {
+    struct coda_out_hdr oh;
+    CodaFid Fid;
+    int	vtype;
+};
+
+
+/* CODA_SIGNAL is out-of-band, doesn't need data. */
+/* CODA_INVALIDATE is a venus->kernel call */
+/* CODA_FLUSH is a venus->kernel call */
+
+/* coda_purgeuser: */
+/* CODA_PURGEUSER is a venus->kernel call */
+struct coda_purgeuser_out {
+    struct coda_out_hdr oh;
+#ifdef CODA_COMPAT_5
+    struct coda_cred cred;
+#else
+    uid_t uid;
+#endif
+};
+
+/* coda_zapfile: */
+/* CODA_ZAPFILE is a venus->kernel call */
+struct coda_zapfile_out {  
+    struct coda_out_hdr oh;
+    CodaFid Fid;
+};
+
+/* coda_zapdir: */
+/* CODA_ZAPDIR is a venus->kernel call */	
+struct coda_zapdir_out {	  
+    struct coda_out_hdr oh;
+    CodaFid Fid;
+};
+
+/* coda_zapnode: */
+/* CODA_ZAPVNODE is a venus->kernel call */	
+struct coda_zapvnode_out { 
+    struct coda_out_hdr oh;
+#ifdef CODA_COMPAT_5
+    struct coda_cred cred;
+#endif
+    CodaFid Fid;
+};
+
+/* coda_purgefid: */
+/* CODA_PURGEFID is a venus->kernel call */	
+struct coda_purgefid_out { 
+    struct coda_out_hdr oh;
+    CodaFid Fid;
+};
+
+/* coda_replace: */
+/* CODA_REPLACE is a venus->kernel call */	
+struct coda_replace_out { /* coda_replace is a venus->kernel call */
+     struct coda_out_hdr oh;
+    CodaFid NewFid;
+    CodaFid OldFid;
+};
+
+/* coda_open_by_fd: */
+struct coda_open_by_fd_in {
+    struct coda_in_hdr ih;
+    CodaFid Fid;
+    int	flags;
+};
+
+struct coda_open_by_fd_out {
+    struct coda_out_hdr oh;
+    int fd;
+    struct vnode *vp;
+};
+
+/* coda_open_by_path: */
+struct coda_open_by_path_in {
+    struct coda_in_hdr ih;
+    CodaFid	Fid;
+    int	flags;
+};
+
+struct coda_open_by_path_out {
+    struct coda_out_hdr oh;
+    int path;
+};
+
+/* coda_statfs: NO_IN */
+struct coda_statfs_in {
+    struct coda_in_hdr ih;
+};
+
+struct coda_statfs_out {
+    struct coda_out_hdr oh;
+    struct coda_statfs stat;
+};
+
+/* 
+ * Occasionally, we don't cache the fid returned by CODA_LOOKUP. 
+ * For instance, if the fid is inconsistent. 
+ * This case is handled by setting the top bit of the type result parameter.
+ */
+#define CODA_NOCACHE          0x80000000
+
+union inputArgs {
+    struct coda_in_hdr ih;		/* NB: every struct below begins with an ih */
+    struct coda_open_in coda_open;
+    struct coda_close_in coda_close;
+    struct coda_ioctl_in coda_ioctl;
+    struct coda_getattr_in coda_getattr;
+    struct coda_setattr_in coda_setattr;
+    struct coda_access_in coda_access;
+    struct coda_lookup_in coda_lookup;
+    struct coda_create_in coda_create;
+    struct coda_remove_in coda_remove;
+    struct coda_link_in coda_link;
+    struct coda_rename_in coda_rename;
+    struct coda_mkdir_in coda_mkdir;
+    struct coda_rmdir_in coda_rmdir;
+    struct coda_readdir_in coda_readdir;
+    struct coda_symlink_in coda_symlink;
+    struct coda_readlink_in coda_readlink;
+    struct coda_fsync_in coda_fsync;
+    struct coda_vget_in coda_vget;
+    struct coda_open_by_fd_in coda_open_by_fd;
+    struct coda_open_by_path_in coda_open_by_path;
+    struct coda_statfs_in coda_statfs;
+};
+
+union outputArgs {
+    struct coda_out_hdr oh;		/* NB: every struct below begins with an oh */
+    struct coda_root_out coda_root;
+    struct coda_open_out coda_open;
+    struct coda_ioctl_out coda_ioctl;
+    struct coda_getattr_out coda_getattr;
+    struct coda_lookup_out coda_lookup;
+    struct coda_create_out coda_create;
+    struct coda_mkdir_out coda_mkdir;
+    struct coda_readdir_out coda_readdir;
+    struct coda_readlink_out coda_readlink;
+    struct coda_vget_out coda_vget;
+    struct coda_purgeuser_out coda_purgeuser;
+    struct coda_zapfile_out coda_zapfile;
+    struct coda_zapdir_out coda_zapdir;
+    struct coda_zapvnode_out coda_zapvnode;
+    struct coda_purgefid_out coda_purgefid;
+    struct coda_replace_out coda_replace;
+    struct coda_open_by_fd_out coda_open_by_fd;
+    struct coda_open_by_path_out coda_open_by_path;
+    struct coda_statfs_out coda_statfs;
+};    
+
+union coda_downcalls {
+    /* CODA_INVALIDATE is a venus->kernel call */
+    /* CODA_FLUSH is a venus->kernel call */
+    struct coda_purgeuser_out purgeuser;
+    struct coda_zapfile_out zapfile;
+    struct coda_zapdir_out zapdir;
+    struct coda_zapvnode_out zapvnode;
+    struct coda_purgefid_out purgefid;
+    struct coda_replace_out replace;
+};
+
+
+/*
+ * Used for identifying usage of "Control" and pioctls
+ */
+
+#define PIOCPARM_MASK 0x0000ffff
+struct ViceIoctl {
+        caddr_t in, out;        /* Data to be transferred in, or out */
+        short in_size;          /* Size of input buffer <= 2K */
+        short out_size;         /* Maximum size of output buffer, <= 2K */
+};
+
+#if defined(__CYGWIN32__) || defined(DJGPP)
+struct PioctlData {
+	unsigned long cmd;
+        const char *path;
+        int follow;
+        struct ViceIoctl vi;
+};
+#else
+struct PioctlData {
+        const char *path;
+        int follow;
+        struct ViceIoctl vi;
+};
+#endif
+
+#define	CODA_CONTROL		".CONTROL"
+#define CODA_CONTROLLEN           8
+#define CTL_INO                 -1
+#define	CTL_FILE		"/coda/.CONTROL"
+
+#ifdef CODA_COMPAT_5
+#define CTL_FID			{ -1, -1, -1 }
+#define IS_CTL_FID(fidp)	((fidp)->Volume == -1 &&\
+				 (fidp)->Vnode == -1 &&\
+				 (fidp)->Unique == -1)
+#define INVAL_FID		{ 0, 0, 0 }
+#else
+#define	CTL_FID			{ { -1, -1, -1, -1 } }
+#define	IS_CTL_FID(fidp)	((fidp)->opaque[0] == -1 &&\
+				 (fidp)->opaque[1] == -1 &&\
+				 (fidp)->opaque[2] == -1 &&\
+				 (fidp)->opaque[3] == -1)
+#define	INVAL_FID		{ { 0, 0, 0, 0 } }
+#endif
+
+/* Data passed to mount */
+
+#define CODA_MOUNT_VERSION 1
+
+struct coda_mount_data {
+	int		version;
+	int		fd;       /* Opened device */
+};
+
+#endif 
+
--- /dev/null
+++ sys/fs/coda/coda_psdev.h
@@ -0,0 +1,39 @@
+/*-
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_psdev.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $FreeBSD: src/sys/fs/coda/coda_psdev.h,v 1.6 2007/07/12 20:40:37 rwatson Exp $
+ * 
+ */
+
+int vc_nb_open(struct cdev *dev, int flag, int mode, struct thread *p);
+int vc_nb_close (struct cdev *dev, int flag, int mode, struct thread *p);
+int vc_nb_read(struct cdev *dev, struct uio *uiop, int flag);
+int vc_nb_write(struct cdev *dev, struct uio *uiop, int flag);
+int vc_nb_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *p);
+int vc_nb_poll(struct cdev *dev, int events, struct thread *p);
--- /dev/null
+++ sys/fs/coda/README
@@ -0,0 +1,62 @@
+$FreeBSD: src/sys/fs/coda/README,v 1.4 2007/07/12 20:40:37 rwatson Exp $
+
+                Announcing the Availability of the
+                        Coda Distributed
+                           Filesystem
+                              for
+                         BSD Unix Systems
+
+        Coda is a distributed filesystem like NFS and AFS.  It is
+freely available, like NFS.  But it functions much like AFS in being a
+"stateful" filesystem.  Coda and AFS cache files on your local
+machine to improve performance.  But Coda goes a step further than AFS
+by letting you access the cached files when there is no available
+network, viz. disconnected laptops and network outages.  In Coda, both
+the client and server are outside the kernel which makes them easier
+to experiment with.
+
+To get more information on Coda, I would like to refer people to
+        http://www.coda.cs.cmu.edu
+There is a wealth of documents, papers, and theses there.  There is
+also a good introduction to the Coda File System in
+        http://www.coda.cs.cmu.edu/ljpaper/lj.html
+
+Coda was originally developed as an academic prototype/testbed.  It is
+being polished and rewritten where necessary.  Coda is a work in
+progress and does have bugs.  It is, though, very usable.  Our
+interest is in making Coda available to as many people as possible and
+to have Coda evolve and flourish.
+
+The bulk of the Coda filesystem code supports the Coda client
+program, the Coda server program and the utilities needed by both.
+All these programs are unix programs and can run equally well on any
+Unix platform.  Our main development thrust is improving these
+programs.  There is a small part of Coda that deals with the kernel to
+filesystem interface.  This code is OS specific (but should not be
+platform specific).
+
+Coda is currently available for several OS's and platforms:
+        Freebsd-2.2.5: i386
+        Freebsd-2.2.6: i386
+	Freebsd -current: i386
+        linux 2.0: i386 & sparc
+        linux 2.1: i386 & sparc
+        NetBSD 1.3: i386
+	NetBSD -current: i386
+The relevant sources, binaries, and docs can be found in
+        ftp://ftp.coda.cs.cmu.edu/pub/coda/
+
+We intend to come out with new Coda releases often, not daily.  We
+don't want to slight any OS/platform not mentioned above.  We are just
+limited in our resources as to what we can support internally.  We
+will be happy to integrate OpenBSD support as well as other OS
+support.  Also, adding platform support should be relatively easy and
+we can discuss this.  The only difficulty is that Coda has a light weight
+process package.  It does some manipulations in assembler which would
+have to be redone for a different platform.
+
+There are several mailing lists @coda.cs.cmu.edu that discuss coda:
+coda-announce and linux-coda.  We are going to revise linux-coda to be
+OS neutral, since it is mainly Coda we want to discuss.  We appreciate
+comments, feedback, bug reports, bug fixes, enhancements, etc.
+
Index: null_vfsops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/nullfs/null_vfsops.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/nullfs/null_vfsops.c -L sys/fs/nullfs/null_vfsops.c -u -r1.2 -r1.3
--- sys/fs/nullfs/null_vfsops.c
+++ sys/fs/nullfs/null_vfsops.c
@@ -32,7 +32,7 @@
  *	@(#)null_vfsops.c	8.2 (Berkeley) 1/21/94
  *
  * @(#)lofs_vfsops.c	1.2 (Berkeley) 6/18/92
- * $FreeBSD: src/sys/fs/nullfs/null_vfsops.c,v 1.72.2.3 2006/03/13 03:05:21 jeff Exp $
+ * $FreeBSD: src/sys/fs/nullfs/null_vfsops.c,v 1.83 2007/05/29 11:28:28 rwatson Exp $
  */
 
 /*
@@ -42,7 +42,6 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
@@ -53,10 +52,9 @@
 
 #include <fs/nullfs/null.h>
 
-static MALLOC_DEFINE(M_NULLFSMNT, "NULLFS mount", "NULLFS mount structure");
+static MALLOC_DEFINE(M_NULLFSMNT, "nullfs_mount", "NULLFS mount structure");
 
 static vfs_fhtovp_t	nullfs_fhtovp;
-static vfs_checkexp_t	nullfs_checkexp;
 static vfs_mount_t	nullfs_mount;
 static vfs_quotactl_t	nullfs_quotactl;
 static vfs_root_t	nullfs_root;
@@ -64,7 +62,6 @@
 static vfs_statfs_t	nullfs_statfs;
 static vfs_unmount_t	nullfs_unmount;
 static vfs_vget_t	nullfs_vget;
-static vfs_vptofh_t	nullfs_vptofh;
 static vfs_extattrctl_t	nullfs_extattrctl;
 
 /*
@@ -89,8 +86,13 @@
 	 * Update is a no-op
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
-		return (EOPNOTSUPP);
-		/* return VFS_MOUNT(MOUNTTONULLMOUNT(mp)->nullm_vfs, path, data, ndp, td);*/
+		/*
+		 * Only support update mounts for NFS export.
+		 */
+		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0))
+			return (0);
+		else
+			return (EOPNOTSUPP);
 	}
 
 	/*
@@ -175,9 +177,14 @@
 	 */
 	VOP_UNLOCK(vp, 0, td);
 
-	if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+	if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) {
+		MNT_ILOCK(mp);
 		mp->mnt_flag |= MNT_LOCAL;
+		MNT_IUNLOCK(mp);
+	}
+	MNT_ILOCK(mp);
 	mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag & MNTK_MPSAFE;
+	MNT_IUNLOCK(mp);
 	mp->mnt_data = (qaddr_t) xmp;
 	vfs_getnewfsid(mp);
 
@@ -240,11 +247,8 @@
 	VREF(vp);
 
 #ifdef NULLFS_DEBUG
-	if (VOP_ISLOCKED(vp, NULL)) {
-		kdb_enter("root vnode is locked.\n");
-		vrele(vp);
-		return (EDEADLK);
-	}
+	if (VOP_ISLOCKED(vp, NULL))
+		panic("root vnode is locked.\n");
 #endif
 	vn_lock(vp, flags | LK_RETRY, td);
 	*vpp = vp;
@@ -256,7 +260,7 @@
 	struct mount *mp;
 	int cmd;
 	uid_t uid;
-	caddr_t arg;
+	void *arg;
 	struct thread *td;
 {
 	return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, arg, td);
@@ -335,29 +339,6 @@
 	return (null_nodeget(mp, *vpp, vpp));
 }
 
-static int
-nullfs_checkexp(mp, nam, extflagsp, credanonp)
-	struct mount *mp;
-	struct sockaddr *nam;
-	int *extflagsp; 
-	struct ucred **credanonp;
-{
-
-	return VFS_CHECKEXP(MOUNTTONULLMOUNT(mp)->nullm_vfs, nam, 
-		extflagsp, credanonp);
-}
-
-static int
-nullfs_vptofh(vp, fhp)
-	struct vnode *vp;
-	struct fid *fhp;
-{
-	struct vnode *lvp;
-
-	lvp = NULLVPTOLOWERVP(vp);
-	return VFS_VPTOFH(lvp, fhp);
-}
-
 static int                        
 nullfs_extattrctl(mp, cmd, filename_vp, namespace, attrname, td)
 	struct mount *mp;
@@ -373,7 +354,6 @@
 
 
 static struct vfsops null_vfsops = {
-	.vfs_checkexp =		nullfs_checkexp,
 	.vfs_extattrctl =	nullfs_extattrctl,
 	.vfs_fhtovp =		nullfs_fhtovp,
 	.vfs_init =		nullfs_init,
@@ -385,7 +365,6 @@
 	.vfs_uninit =		nullfs_uninit,
 	.vfs_unmount =		nullfs_unmount,
 	.vfs_vget =		nullfs_vget,
-	.vfs_vptofh =		nullfs_vptofh,
 };
 
 VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK);
Index: null_subr.c
===================================================================
RCS file: /home/cvs/src/sys/fs/nullfs/null_subr.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/nullfs/null_subr.c -L sys/fs/nullfs/null_subr.c -u -r1.2 -r1.3
--- sys/fs/nullfs/null_subr.c
+++ sys/fs/nullfs/null_subr.c
@@ -31,7 +31,7 @@
  *
  *	@(#)null_subr.c	8.7 (Berkeley) 5/14/95
  *
- * $FreeBSD: src/sys/fs/nullfs/null_subr.c,v 1.48.2.1 2006/03/13 03:05:17 jeff Exp $
+ * $FreeBSD: src/sys/fs/nullfs/null_subr.c,v 1.51.2.1 2007/10/22 05:44:07 daichi Exp $
  */
 
 #include <sys/param.h>
@@ -64,8 +64,8 @@
 static u_long null_node_hash;
 struct mtx null_hashmtx;
 
-static MALLOC_DEFINE(M_NULLFSHASH, "NULLFS hash", "NULLFS hash table");
-MALLOC_DEFINE(M_NULLFSNODE, "NULLFS node", "NULLFS vnode private part");
+static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table");
+MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part");
 
 static struct vnode * null_hashget(struct mount *, struct vnode *);
 static struct vnode * null_hashins(struct mount *, struct null_node *);
@@ -185,6 +185,18 @@
 	return (NULLVP);
 }
 
+static void
+null_insmntque_dtr(struct vnode *vp, void *xp)
+{
+	vp->v_data = NULL;
+	vp->v_vnlock = &vp->v_lock;
+	FREE(xp, M_NULLFSNODE);
+	vp->v_op = &dead_vnodeops;
+	(void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
+	vgone(vp);
+	vput(vp);
+}
+
 /*
  * Make a new or get existing nullfs node.
  * Vp is the alias vnode, lowervp is the lower vnode.
@@ -239,6 +251,9 @@
 	vp->v_vnlock = lowervp->v_vnlock;
 	if (vp->v_vnlock == NULL)
 		panic("null_nodeget: Passed a NULL vnlock.\n");
+	error = insmntque1(vp, mp, null_insmntque_dtr, xp);
+	if (error != 0)
+		return (error);
 	/*
 	 * Atomically insert our new node into the hash or vget existing 
 	 * if someone else has beaten us to it.
@@ -283,6 +298,7 @@
 	char *fil;
 	int lno;
 {
+	int interlock = 0;
 	struct null_node *a = VTONULL(vp);
 #ifdef notyet
 	/*
@@ -306,6 +322,10 @@
 		while (null_checkvp_barrier) /*WAIT*/ ;
 		panic("null_checkvp");
 	}
+	if (mtx_owned(VI_MTX(vp)) != 0) {
+		VI_UNLOCK(vp);
+		interlock = 1;
+	}
 	if (vrefcnt(a->null_lowervp) < 1) {
 		int i; u_long *p;
 		printf("vp = %p, unref'ed lowervp\n", (void *)vp);
@@ -316,6 +336,8 @@
 		while (null_checkvp_barrier) /*WAIT*/ ;
 		panic ("null with unref'ed lowervp");
 	};
+	if (interlock != 0)
+		VI_LOCK(vp);
 #ifdef notyet
 	printf("null %x/%d -> %x/%d [%s, %d]\n",
 	        NULLTOV(a), vrefcnt(NULLTOV(a)),
Index: null_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/nullfs/null_vnops.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/fs/nullfs/null_vnops.c -L sys/fs/nullfs/null_vnops.c -u -r1.2 -r1.3
--- sys/fs/nullfs/null_vnops.c
+++ sys/fs/nullfs/null_vnops.c
@@ -36,7 +36,7 @@
  *	...and...
  *	@(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
  *
- * $FreeBSD: src/sys/fs/nullfs/null_vnops.c,v 1.87.2.3 2006/03/13 03:05:26 jeff Exp $
+ * $FreeBSD: src/sys/fs/nullfs/null_vnops.c,v 1.95.2.1 2007/10/22 05:44:07 daichi Exp $
  */
 
 /*
@@ -180,7 +180,6 @@
 #include <sys/namei.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
-#include <sys/kdb.h>
 
 #include <fs/nullfs/null.h>
 
@@ -510,7 +509,7 @@
  * vnodes below us on the stack.
  */
 static int
-null_lock(struct vop_lock_args *ap)
+null_lock(struct vop_lock1_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	int flags = ap->a_flags;
@@ -587,20 +586,33 @@
 {
 	struct vnode *vp = ap->a_vp;
 	int flags = ap->a_flags;
+	int mtxlkflag = 0;
 	struct thread *td = ap->a_td;
 	struct null_node *nn;
 	struct vnode *lvp;
 	int error;
 
-	if ((flags & LK_INTERLOCK) != 0) {
-		VI_UNLOCK(vp);
-		ap->a_flags = flags &= ~LK_INTERLOCK;
+	if ((flags & LK_INTERLOCK) != 0)
+		mtxlkflag = 1;
+	else if (mtx_owned(VI_MTX(vp)) == 0) {
+		VI_LOCK(vp);
+		mtxlkflag = 2;
 	}
 	nn = VTONULL(vp);
-	if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL)
+	if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) {
+		VI_LOCK_FLAGS(lvp, MTX_DUPOK);
+		flags |= LK_INTERLOCK;
+		vholdl(lvp);
+		VI_UNLOCK(vp);
 		error = VOP_UNLOCK(lvp, flags, td);
-	else
+		vdrop(lvp);
+		if (mtxlkflag == 0)
+			VI_LOCK(vp);
+	} else {
+		if (mtxlkflag == 2)
+			VI_UNLOCK(vp);
 		error = vop_stdunlock(ap);
+	}
 
 	return (error);
 }
@@ -708,6 +720,15 @@
 	return (0);
 }
 
+static int
+null_vptofh(struct vop_vptofh_args *ap)
+{
+	struct vnode *lvp;
+
+	lvp = NULLVPTOLOWERVP(ap->a_vp);
+	return VOP_VPTOFH(lvp, ap->a_fhp);
+}
+
 /*
  * Global vfs data structures
  */
@@ -719,7 +740,7 @@
 	.vop_getwritemount =	null_getwritemount,
 	.vop_inactive =		null_inactive,
 	.vop_islocked =		null_islocked,
-	.vop_lock =		null_lock,
+	.vop_lock1 =		null_lock,
 	.vop_lookup =		null_lookup,
 	.vop_open =		null_open,
 	.vop_print =		null_print,
@@ -728,4 +749,5 @@
 	.vop_setattr =		null_setattr,
 	.vop_strategy =		VOP_EOPNOTSUPP,
 	.vop_unlock =		null_unlock,
+	.vop_vptofh =		null_vptofh,
 };
Index: union_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/unionfs/union_vnops.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/unionfs/union_vnops.c -L sys/fs/unionfs/union_vnops.c -u -r1.1.1.1 -r1.2
--- sys/fs/unionfs/union_vnops.c
+++ sys/fs/unionfs/union_vnops.c
@@ -1,7 +1,10 @@
 /*-
  * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
  * Copyright (c) 1992, 1993, 1994, 1995
- *	The Regents of the University of California.  All rights reserved.
+ *      The Regents of the University of California.
+ * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa at ongs.co.jp>, ONGS Inc.
+ * Copyright (c) 2006 Daichi Goto <daichi at freebsd.org>
+ * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Jan-Simon Pendry.
@@ -31,797 +34,638 @@
  * SUCH DAMAGE.
  *
  *	@(#)union_vnops.c	8.32 (Berkeley) 6/23/95
- * $FreeBSD: src/sys/fs/unionfs/union_vnops.c,v 1.132 2005/05/03 11:05:33 jeff Exp $
+ * $FreeBSD: src/sys/fs/unionfs/union_vnops.c,v 1.142.2.7 2007/10/22 05:41:54 daichi Exp $
+ *
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/fcntl.h>
-#include <sys/stat.h>
+#include <sys/conf.h>
 #include <sys/kernel.h>
-#include <sys/vnode.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
 #include <sys/mount.h>
+#include <sys/mutex.h>
 #include <sys/namei.h>
-#include <sys/malloc.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/kdb.h>
+#include <sys/fcntl.h>
+#include <sys/stat.h>
+#include <sys/dirent.h>
+#include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
-#include <sys/lock.h>
-#include <sys/sysctl.h>
-#include <sys/unistd.h>
-#include <sys/acl.h>
-#include <sys/event.h>
-#include <sys/extattr.h>
-#include <sys/mac.h>
+
 #include <fs/unionfs/union.h>
 
 #include <vm/vm.h>
-#include <vm/vnode_pager.h>
-
-#include <vm/vm_page.h>
+#include <vm/vm_extern.h>
 #include <vm/vm_object.h>
+#include <vm/vnode_pager.h>
 
-int uniondebug = 0;
-
-#if UDEBUG_ENABLED
-SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, "");
+#if 0
+#define UNIONFS_INTERNAL_DEBUG(msg, args...)    printf(msg, ## args)
+#define UNIONFS_IDBG_RENAME
 #else
-SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, "");
+#define UNIONFS_INTERNAL_DEBUG(msg, args...)
 #endif
 
-static vop_access_t	union_access;
-static vop_aclcheck_t	union_aclcheck;
-static vop_advlock_t	union_advlock;
-static vop_close_t	union_close;
-static vop_closeextattr_t	union_closeextattr;
-static vop_create_t	union_create;
-static vop_deleteextattr_t	union_deleteextattr;
-static vop_fsync_t	union_fsync;
-static vop_getacl_t	union_getacl;
-static vop_getattr_t	union_getattr;
-static vop_getextattr_t	union_getextattr;
-static vop_inactive_t	union_inactive;
-static vop_ioctl_t	union_ioctl;
-static vop_lease_t	union_lease;
-static vop_link_t	union_link;
-static vop_listextattr_t	union_listextattr;
-static vop_lookup_t	union_lookup;
-static int	union_lookup1(struct vnode *udvp, struct vnode **dvp,
-				   struct vnode **vpp,
-				   struct componentname *cnp);
-static vop_mkdir_t	union_mkdir;
-static vop_mknod_t	union_mknod;
-static vop_open_t	union_open;
-static vop_openextattr_t	union_openextattr;
-static vop_pathconf_t	union_pathconf;
-static vop_print_t	union_print;
-static vop_read_t	union_read;
-static vop_readdir_t	union_readdir;
-static vop_readlink_t	union_readlink;
-static vop_getwritemount_t	union_getwritemount;
-static vop_reclaim_t	union_reclaim;
-static vop_remove_t	union_remove;
-static vop_rename_t	union_rename;
-static vop_rmdir_t	union_rmdir;
-static vop_poll_t	union_poll;
-static vop_setacl_t	union_setacl;
-static vop_setattr_t	union_setattr;
-static vop_setlabel_t	union_setlabel;
-static vop_setextattr_t	union_setextattr;
-static vop_strategy_t	union_strategy;
-static vop_symlink_t	union_symlink;
-static vop_whiteout_t	union_whiteout;
-static vop_write_t	union_write;
-
-static __inline
-struct vnode *
-union_lock_upper(struct union_node *un, struct thread *td)
-{
-	struct vnode *uppervp;
-
-	if ((uppervp = un->un_uppervp) != NULL) {
-		VREF(uppervp);
-		vn_lock(uppervp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td);
-	}
-	KASSERT((uppervp == NULL || vrefcnt(uppervp) > 0), ("uppervp usecount is 0"));
-	return(uppervp);
-}
-
-static __inline
-void
-union_unlock_upper(struct vnode *uppervp, struct thread *td)
-{
-	vput(uppervp);
-}
-
-static __inline
-struct vnode *
-union_lock_other(struct union_node *un, struct thread *td)
-{
-	struct vnode *vp;
-
-	if (un->un_uppervp != NULL) {
-		vp = union_lock_upper(un, td);
-	} else if ((vp = un->un_lowervp) != NULL) {
-		VREF(vp);
-		vn_lock(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td);
-	}
-	return(vp);
-}
-
-static __inline
-void
-union_unlock_other(struct vnode *vp, struct thread *td)
-{
-	vput(vp);
-}
+/* lockmgr lock <-> reverse table */
+struct lk_lr_table {
+	int	lock;
+	int	revlock;
+};
+
+static struct lk_lr_table un_llt[] = {
+	{LK_SHARED, LK_RELEASE},
+	{LK_EXCLUSIVE, LK_RELEASE},
+	{LK_UPGRADE, LK_DOWNGRADE},
+	{LK_EXCLUPGRADE, LK_DOWNGRADE},
+	{LK_DOWNGRADE, LK_UPGRADE},
+	{0, 0}
+};
 
-/*
- *	union_lookup:
- *
- *	udvp	must be exclusively locked on call and will remain 
- *		exclusively locked on return.  This is the mount point 
- *		for our filesystem.
- *
- *	dvp	Our base directory, locked and referenced.
- *		The passed dvp will be dereferenced and unlocked on return
- *		and a new dvp will be returned which is locked and 
- *		referenced in the same variable.
- *
- *	vpp	is filled in with the result if no error occured,
- *		locked and ref'd.
- *
- *		If an error is returned, *vpp is set to NULLVP.  If no
- *		error occurs, *vpp is returned with a reference and an
- *		exclusive lock.
- */
 
 static int
-union_lookup1(udvp, pdvp, vpp, cnp)
-	struct vnode *udvp;
-	struct vnode **pdvp;
-	struct vnode **vpp;
-	struct componentname *cnp;
+unionfs_lookup(struct vop_cachedlookup_args *ap)
 {
-	int error;
-	struct thread *td = cnp->cn_thread;
-	struct vnode *dvp = *pdvp;
-	struct vnode *tdvp;
-	struct mount *mp;
+	int		iswhiteout;
+	int		lockflag;
+	int		error , uerror, lerror;
+	u_long		nameiop;
+	u_long		cnflags, cnflagsbk;
+	struct unionfs_node *dunp;
+	struct vnode   *dvp, *udvp, *ldvp, *vp, *uvp, *lvp, *dtmpvp;
+	struct vattr	va;
+	struct componentname *cnp;
+	struct thread  *td;
 
-	/*
-	 * If stepping up the directory tree, check for going
-	 * back across the mount point, in which case do what
-	 * lookup would do by stepping back down the mount
-	 * hierarchy.
-	 */
-	if (cnp->cn_flags & ISDOTDOT) {
-		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
-			/*
-			 * Don't do the NOCROSSMOUNT check
-			 * at this level.  By definition,
-			 * union fs deals with namespaces, not
-			 * filesystems.
-			 */
-			tdvp = dvp;
-			dvp = dvp->v_mount->mnt_vnodecovered;
-			VREF(dvp);
-			vput(tdvp);
-			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
-		}
-	}
+	iswhiteout = 0;
+	lockflag = 0;
+	error = uerror = lerror = ENOENT;
+	cnp = ap->a_cnp;
+	nameiop = cnp->cn_nameiop;
+	cnflags = cnp->cn_flags;
+	dvp = ap->a_dvp;
+	dunp = VTOUNIONFS(dvp);
+	udvp = dunp->un_uppervp;
+	ldvp = dunp->un_lowervp;
+	vp = uvp = lvp = NULLVP;
+	td = curthread;
+	*(ap->a_vpp) = NULLVP;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_lookup: enter: nameiop=%ld, flags=%lx, path=%s\n", nameiop, cnflags, cnp->cn_nameptr);
 
-	/*
-	 * Set return dvp to be the upperdvp 'parent directory.
-	 */
-	*pdvp = dvp;
+	if (dvp->v_type != VDIR)
+		return (ENOTDIR);
 
 	/*
-	 * If the VOP_LOOKUP() call generates an error, tdvp is invalid and
-	 * no changes will have been made to dvp, so we are set to return.
+	 * If read-only and op is not LOOKUP, will return EROFS.
 	 */
-
-        error = VOP_LOOKUP(dvp, &tdvp, cnp);
-	if (error) {
-		UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags));
-		*vpp = NULL;
-		return (error);
-	}
-	UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags));
+	if ((cnflags & ISLASTCN) &&
+	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+	    LOOKUP != nameiop)
+		return (EROFS);
 
 	/*
-	 * Lastly check if the current node is a mount point in
-	 * which case walk up the mount hierarchy making sure not to
-	 * bump into the root of the mount tree (ie. dvp != udvp).
-	 *
-	 * We use dvp as a temporary variable here, it is no longer related
-	 * to the dvp above.  However, we have to ensure that both *pdvp and
-	 * tdvp are locked on return.
+	 * lookup dotdot
 	 */
+	if (cnflags & ISDOTDOT) {
+		if (LOOKUP != nameiop && udvp == NULLVP)
+			return (EROFS);
 
-	dvp = tdvp;
-	while (
-	    dvp != udvp && 
-	    (dvp->v_type == VDIR) &&
-	    (mp = dvp->v_mountedhere)
-	) {
-		int relock_pdvp = 0;
-
-		if (vfs_busy(mp, 0, 0, td))
-			continue;
-
-		if (dvp == *pdvp)
-			relock_pdvp = 1;
-		vput(dvp);
-		dvp = NULL;
-		error = VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td);
-
-		vfs_unbusy(mp, td);
-
-		if (relock_pdvp)
-			vn_lock(*pdvp, LK_EXCLUSIVE | LK_RETRY, td);
-
-		if (error) {
-			*vpp = NULL;
-			return (error);
+		if (udvp != NULLVP) {
+			dtmpvp = udvp;
+			if (ldvp != NULLVP)
+				VOP_UNLOCK(ldvp, 0, td);
 		}
-	}
-	*vpp = dvp;
-	return (0);
-}
+		else
+			dtmpvp = ldvp;
 
-static int
-union_lookup(ap)
-	struct vop_lookup_args /* {
-		struct vnodeop_desc *a_desc;
-		struct vnode *a_dvp;
-		struct vnode **a_vpp;
-		struct componentname *a_cnp;
-	} */ *ap;
-{
-	int error;
-	int uerror, lerror;
-	struct vnode *uppervp, *lowervp;
-	struct vnode *upperdvp, *lowerdvp;
-	struct vnode *dvp = ap->a_dvp;		/* starting dir */
-	struct union_node *dun = VTOUNION(dvp);	/* associated union node */
-	struct componentname *cnp = ap->a_cnp;
-	struct thread *td = cnp->cn_thread;
-	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
-	struct ucred *saved_cred = NULL;
-	int iswhiteout;
-	struct vattr va;
+		error = VOP_LOOKUP(dtmpvp, &vp, cnp);
 
-	*ap->a_vpp = NULLVP;
+		if (dtmpvp == udvp && ldvp != NULLVP) {
+			VOP_UNLOCK(udvp, 0, td);
+			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
+		}
 
-	/*
-	 * Disallow write attempts to the filesystem mounted read-only.
-	 */
-	if ((cnp->cn_flags & ISLASTCN) && 
-	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
-	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
-		return (EROFS);
-	}
+		if (error == 0) {
+			/*
+			 * Exchange lock and reference from vp to
+			 * dunp->un_dvp. vp is upper/lower vnode, but it
+			 * will need to return the unionfs vnode.
+			 */
+			if (nameiop == DELETE  || nameiop == RENAME ||
+			    (cnp->cn_lkflags & LK_TYPE_MASK))
+				VOP_UNLOCK(vp, 0, td);
+			vrele(vp);
+
+			VOP_UNLOCK(dvp, 0, td);
+			*(ap->a_vpp) = dunp->un_dvp;
+			vref(dunp->un_dvp);
+
+			if (nameiop == DELETE || nameiop == RENAME)
+				vn_lock(dunp->un_dvp, LK_EXCLUSIVE | LK_RETRY, td);
+			else if (cnp->cn_lkflags & LK_TYPE_MASK)
+				vn_lock(dunp->un_dvp, cnp->cn_lkflags | LK_RETRY, td);
 
-	/*
-	 * For any lookups we do, always return with the parent locked.
-	 */
-	cnp->cn_flags |= LOCKPARENT;
+			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
+		} else if (error == ENOENT && (cnflags & MAKEENTRY) &&
+		    nameiop != CREATE)
+			cache_enter(dvp, NULLVP, cnp);
 
-	lowerdvp = dun->un_lowervp;
-	uppervp = NULLVP;
-	lowervp = NULLVP;
-	iswhiteout = 0;
+		UNIONFS_INTERNAL_DEBUG("unionfs_lookup: leave (%d)\n", error);
 
-	uerror = ENOENT;
-	lerror = ENOENT;
+		return (error);
+	}
 
 	/*
-	 * Get a private lock on uppervp and a reference, effectively 
-	 * taking it out of the union_node's control.
-	 *
-	 * We must lock upperdvp while holding our lock on dvp
-	 * to avoid a deadlock.
+	 * lookup upper layer
 	 */
-	upperdvp = union_lock_upper(dun, td);
+	if (udvp != NULLVP) {
+		uerror = VOP_LOOKUP(udvp, &uvp, cnp);
 
-	/*
-	 * Do the lookup in the upper level.
-	 * If that level consumes additional pathnames,
-	 * then assume that something special is going
-	 * on and just return that vnode.
-	 */
-	if (upperdvp != NULLVP) {
-		/*
-		 * We do not have to worry about the DOTDOT case, we've
-		 * already unlocked dvp.
-		 */
-		UDEBUG(("A %p\n", upperdvp));
+		if (uerror == 0) {
+			if (udvp == uvp) {	/* is dot */
+				vrele(uvp);
+				*(ap->a_vpp) = dvp;
+				vref(dvp);
 
-		/*
-		 * Do the lookup.   We must supply a locked and referenced
-		 * upperdvp to the function and will get a new locked and
-		 * referenced upperdvp back, with the old having been 
-		 * dereferenced.
-		 *
-		 * If an error is returned, uppervp will be NULLVP.  If no
-		 * error occurs, uppervp will be the locked and referenced.
-		 * Return vnode, or possibly NULL, depending on what is being
-		 * requested.  It is possible that the returned uppervp
-		 * will be the same as upperdvp.
-		 */
-		uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp);
-		UDEBUG((
-		    "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n",
-		    uerror,
-		    upperdvp,
-		    vrefcnt(upperdvp),
-		    VOP_ISLOCKED(upperdvp, NULL),
-		    uppervp,
-		    (uppervp ? vrefcnt(uppervp) : -99),
-		    (uppervp ? VOP_ISLOCKED(uppervp, NULL) : -99)
-		));
+				UNIONFS_INTERNAL_DEBUG("unionfs_lookup: leave (%d)\n", uerror);
 
-		/*
-		 * Disallow write attempts to the filesystem mounted read-only.
-		 */
-		if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) &&
-		    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
-		    (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) {
-			error = EROFS;
-			goto out;
-		}
-
-		/*
-		 * Special case: If cn_consume != 0 then skip out.  The result
-		 * of the lookup is transfered to our return variable.  If
-		 * an error occured we have to throw away the results.
-		 */
-
-		if (cnp->cn_consume != 0) {
-			if ((error = uerror) == 0) {
-				*ap->a_vpp = uppervp;
-				uppervp = NULL;
+				return (uerror);
 			}
-			goto out;
+			if (nameiop == DELETE || nameiop == RENAME ||
+			    (cnp->cn_lkflags & LK_TYPE_MASK))
+				VOP_UNLOCK(uvp, 0, td);
 		}
 
-		/*
-		 * Calculate whiteout, fall through.
-		 */
-
-		if (uerror == ENOENT || uerror == EJUSTRETURN) {
-			if (cnp->cn_flags & ISWHITEOUT) {
-				iswhiteout = 1;
-			} else if (lowerdvp != NULLVP) {
-				int terror;
-
-				terror = VOP_GETATTR(upperdvp, &va,
-					cnp->cn_cred, cnp->cn_thread);
-				if (terror == 0 && (va.va_flags & OPAQUE))
-					iswhiteout = 1;
-			}
-		}
+		/* check whiteout */
+		if (uerror == ENOENT || uerror == EJUSTRETURN)
+			if (cnp->cn_flags & ISWHITEOUT)
+				iswhiteout = 1;	/* don't lookup lower */
+		if (iswhiteout == 0 && ldvp != NULLVP)
+			if (VOP_GETATTR(udvp, &va, cnp->cn_cred, td) == 0 &&
+			    (va.va_flags & OPAQUE))
+				iswhiteout = 1;	/* don't lookup lower */
+#if 0
+		UNIONFS_INTERNAL_DEBUG("unionfs_lookup: debug: whiteout=%d, path=%s\n", iswhiteout, cnp->cn_nameptr);
+#endif
 	}
 
 	/*
-	 * In a similar way to the upper layer, do the lookup
-	 * in the lower layer.   This time, if there is some
-	 * component magic going on, then vput whatever we got
-	 * back from the upper layer and return the lower vnode
-	 * instead.
+	 * lookup lower layer
 	 */
-
-	if (lowerdvp != NULLVP && !iswhiteout) {
-		int nameiop;
-
-		UDEBUG(("B %p\n", lowerdvp));
-
-		/*
-		 * Force only LOOKUPs on the lower node, since
-		 * we won't be making changes to it anyway.
-		 */
-		nameiop = cnp->cn_nameiop;
+	if (ldvp != NULLVP && !(cnflags & DOWHITEOUT) && iswhiteout == 0) {
+		/* always op is LOOKUP */
 		cnp->cn_nameiop = LOOKUP;
-		if (um->um_op == UNMNT_BELOW) {
-			saved_cred = cnp->cn_cred;
-			cnp->cn_cred = um->um_cred;
-		}
+		cnflagsbk = cnp->cn_flags;
+		cnp->cn_flags = cnflags;
 
-		/*
-		 * We shouldn't have to worry about locking interactions
-		 * between the lower layer and our union layer (w.r.t.
-		 * `..' processing) because we don't futz with lowervp
-		 * locks in the union-node instantiation code path.
-		 *
-		 * union_lookup1() requires lowervp to be locked on entry,
-		 * and it will be unlocked on return.  The ref count will
-		 * not change.  On return lowervp doesn't represent anything
-		 * to us so we NULL it out.
-		 */
-		VREF(lowerdvp);
-		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, td);
-		lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp);
-		if (lowerdvp == lowervp)
-			vrele(lowerdvp);
-		else
-			vput(lowerdvp);
-		lowerdvp = NULL;	/* lowerdvp invalid after vput */
+		lerror = VOP_LOOKUP(ldvp, &lvp, cnp);
 
-		if (um->um_op == UNMNT_BELOW)
-			cnp->cn_cred = saved_cred;
 		cnp->cn_nameiop = nameiop;
+		if (udvp != NULLVP && (uerror == 0 || uerror == EJUSTRETURN))
+			cnp->cn_flags = cnflagsbk;
 
-		if (cnp->cn_consume != 0 || lerror == EACCES) {
-			if ((error = lerror) == 0) {
-				*ap->a_vpp = lowervp;
-				lowervp = NULL;
-			}
-			goto out;
-		}
-	} else {
-		UDEBUG(("C %p\n", lowerdvp));
-		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
-			if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) {
-				VREF(lowervp);
-				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, td);
-				lerror = 0;
+		if (lerror == 0) {
+			if (ldvp == lvp) {	/* is dot */
+				if (uvp != NULLVP)
+					vrele(uvp);	/* no need? */
+				vrele(lvp);
+				*(ap->a_vpp) = dvp;
+				vref(dvp);
+
+				UNIONFS_INTERNAL_DEBUG("unionfs_lookup: leave (%d)\n", lerror);
+
+				return (lerror);
 			}
+			if (cnp->cn_lkflags & LK_TYPE_MASK)
+				VOP_UNLOCK(lvp, 0, td);
 		}
 	}
 
 	/*
-	 * Ok.  Now we have uerror, uppervp, upperdvp, lerror, and lowervp.
-	 *
-	 * 1. If both layers returned an error, select the upper layer.
-	 *
-	 * 2. If the upper layer failed and the bottom layer succeeded,
-	 *    two subcases occur:
-	 *
-	 *	a.  The bottom vnode is not a directory, in which case
-	 *	    just return a new union vnode referencing an
-	 *	    empty top layer and the existing bottom layer.
-	 *
-	 *	b.  The bottom vnode is a directory, in which case
-	 *	    create a new directory in the top layer and
-	 *	    and fall through to case 3.
-	 *
-	 * 3. If the top layer succeeded, then return a new union
-	 *    vnode referencing whatever the new top layer and
-	 *    whatever the bottom layer returned.
+	 * check lookup result
 	 */
-
-	/* case 1. */
-	if ((uerror != 0) && (lerror != 0)) {
-		error = uerror;
-		goto out;
-	}
-
-	/* case 2. */
-	if (uerror != 0 /* && (lerror == 0) */ ) {
-		if (lowervp->v_type == VDIR) { /* case 2b. */
-			KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL"));
-			/*
-			 * Oops, uppervp has a problem, we may have to shadow.
-			 */
-			uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
-			if (uerror) {
-				error = uerror;
-				goto out;
-			}
-		}
+	if (uvp == NULLVP && lvp == NULLVP) {
+		UNIONFS_INTERNAL_DEBUG("unionfs_lookup: leave (%d)\n",
+		    (udvp != NULLVP ? uerror : lerror));
+		return (udvp != NULLVP ? uerror : lerror);
 	}
 
 	/*
-	 * Must call union_allocvp() with both the upper and lower vnodes
-	 * referenced and the upper vnode locked.   ap->a_vpp is returned 
-	 * referenced and locked.  lowervp, uppervp, and upperdvp are 
-	 * absorbed by union_allocvp() whether it succeeds or fails.
-	 *
-	 * upperdvp is the parent directory of uppervp which may be
-	 * different, depending on the path, from dvp->un_uppervp.  That's
-	 * why it is a separate argument.  Note that it must be unlocked.
-	 *
-	 * dvp must be locked on entry to the call and will be locked on
-	 * return.
+	 * check vnode type
 	 */
+	if (uvp != NULLVP && lvp != NULLVP && uvp->v_type != lvp->v_type) {
+		vrele(lvp);
+		lvp = NULLVP;
+	}
 
-	if (uppervp && uppervp != upperdvp)
-		VOP_UNLOCK(uppervp, 0, td);
-	if (lowervp)
-		VOP_UNLOCK(lowervp, 0, td);
-	if (upperdvp)
-		VOP_UNLOCK(upperdvp, 0, td);
-
-	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
-			      uppervp, lowervp, 1);
-
-	UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? vrefcnt(*ap->a_vpp) : -99));
-
-	uppervp = NULL;
-	upperdvp = NULL;
-	lowervp = NULL;
-
-	/* 
-	 *	Termination Code
-	 *
-	 *	- put away any extra junk laying around.  Note that lowervp
-	 *	  (if not NULL) will never be the same as *ap->a_vp and 
-	 *	  neither will uppervp, because when we set that state we 
-	 *	  NULL-out lowervp or uppervp.  On the otherhand, upperdvp
-	 *	  may match uppervp or *ap->a_vpp.
-	 *
-	 *	- relock/unlock dvp if appropriate.
+	/*
+	 * check shadow dir
 	 */
+	if (uerror != 0 && uerror != EJUSTRETURN && udvp != NULLVP &&
+	    lerror == 0 && lvp != NULLVP && lvp->v_type == VDIR &&
+	    !(dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+	    (1 < cnp->cn_namelen || '.' != *(cnp->cn_nameptr))) {
+		/* get unionfs vnode in order to create a new shadow dir. */
+		error = unionfs_nodeget(dvp->v_mount, NULLVP, lvp, dvp, &vp,
+		    cnp, td);
+		if (error != 0)
+			goto unionfs_lookup_out;
 
-out:
-	if (upperdvp) {
-		if (upperdvp == uppervp || upperdvp == *ap->a_vpp)
-			vrele(upperdvp);
+		if (LK_SHARED == (cnp->cn_lkflags & LK_TYPE_MASK))
+			VOP_UNLOCK(vp, 0, td);
+		if (LK_EXCLUSIVE != VOP_ISLOCKED(vp, td)) {
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
+			lockflag = 1;
+		}
+		error = unionfs_mkshadowdir(MOUNTTOUNIONFSMOUNT(dvp->v_mount),
+		    udvp, VTOUNIONFS(vp), cnp, td);
+		if (lockflag != 0)
+			VOP_UNLOCK(vp, 0, td);
+		if (error != 0) {
+			UNIONFSDEBUG("unionfs_lookup: Unable to create shadow dir.");
+			if ((cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE)
+				vput(vp);
+			else
+				vrele(vp);
+			goto unionfs_lookup_out;
+		}
+		if ((cnp->cn_lkflags & LK_TYPE_MASK) == LK_SHARED)
+			vn_lock(vp, LK_SHARED | LK_RETRY, td);
+	}
+	/*
+	 * get unionfs vnode.
+	 */
+	else {
+		if (uvp != NULLVP)
+			error = uerror;
 		else
-			vput(upperdvp);
+			error = lerror;
+		if (error != 0)
+			goto unionfs_lookup_out;
+		error = unionfs_nodeget(dvp->v_mount, uvp, lvp, dvp, &vp,
+		    cnp, td);
+		if (error != 0) {
+			UNIONFSDEBUG("unionfs_lookup: Unable to create unionfs vnode.");
+			goto unionfs_lookup_out;
+		}
+		if ((nameiop == DELETE || nameiop == RENAME) &&
+		    (cnp->cn_lkflags & LK_TYPE_MASK) == 0)
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	}
 
-	if (uppervp)
-		vput(uppervp);
+	*(ap->a_vpp) = vp;
 
-	if (lowervp)
-		vput(lowervp);
+	if (cnflags & MAKEENTRY)
+		cache_enter(dvp, vp, cnp);
 
-	UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp,
-		((*ap->a_vpp) ? vrefcnt(*ap->a_vpp) : -99),
-		lowervp, uppervp));
-
-	if (error == 0 || error == EJUSTRETURN) {
-		if (cnp->cn_namelen == 1 &&
-		    cnp->cn_nameptr[0] == '.' &&
-		    *ap->a_vpp != dvp) {
-#ifdef	DIAGNOSTIC
-			vprint("union_lookup: vp", *ap->a_vpp);
-			vprint("union_lookup: dvp", dvp);
-#endif
-			panic("union_lookup returning . (%p) != startdir (%p)",
-			    *ap->a_vpp, dvp);
-		}
-	}
+unionfs_lookup_out:
+	if (uvp != NULLVP)
+		vrele(uvp);
+	if (lvp != NULLVP)
+		vrele(lvp);
+
+	if (error == ENOENT && (cnflags & MAKEENTRY) && nameiop != CREATE)
+		cache_enter(dvp, NULLVP, cnp);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_lookup: leave (%d)\n", error);
 
 	return (error);
 }
 
-/*
- * 	union_create:
- *
- * a_dvp is locked on entry and remains locked on return.  a_vpp is returned
- * locked if no error occurs, otherwise it is garbage.
- */
-
 static int
-union_create(ap)
-	struct vop_create_args /* {
-		struct vnode *a_dvp;
-		struct vnode **a_vpp;
-		struct componentname *a_cnp;
-		struct vattr *a_vap;
-	} */ *ap;
-{
-	struct union_node *dun = VTOUNION(ap->a_dvp);
-	struct componentname *cnp = ap->a_cnp;
-	struct thread *td = cnp->cn_thread;
-	struct vnode *dvp;
-	int error = EROFS;
-
-	if ((dvp = union_lock_upper(dun, td)) != NULL) {
-		struct vnode *vp;
-		struct mount *mp;
+unionfs_create(struct vop_create_args *ap)
+{
+	struct unionfs_node *dunp;
+	struct componentname *cnp;
+	struct thread  *td;
+	struct vnode   *udvp;
+	struct vnode   *vp;
+	int		error;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_create: enter\n");
+
+	dunp = VTOUNIONFS(ap->a_dvp);
+	cnp = ap->a_cnp;
+	td = curthread;
+	udvp = dunp->un_uppervp;
+	error = EROFS;
 
-		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
-		if (error == 0) {
-			mp = ap->a_dvp->v_mount;
+	if (udvp != NULLVP) {
+		if ((error = VOP_CREATE(udvp, &vp, cnp, ap->a_vap)) == 0) {
 			VOP_UNLOCK(vp, 0, td);
-			UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n", vp, vrefcnt(vp)));
-			error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
-				cnp, vp, NULLVP, 1);
-			UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vrefcnt(vp)));
+			error = unionfs_nodeget(ap->a_dvp->v_mount, vp, NULLVP,
+			    ap->a_dvp, ap->a_vpp, cnp, td);
+			vrele(vp);
 		}
-		union_unlock_upper(dvp, td);
 	}
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_create: leave (%d)\n", error);
+
 	return (error);
 }
 
 static int
-union_whiteout(ap)
-	struct vop_whiteout_args /* {
-		struct vnode *a_dvp;
-		struct componentname *a_cnp;
-		int a_flags;
-	} */ *ap;
+unionfs_whiteout(struct vop_whiteout_args *ap)
 {
-	struct union_node *un = VTOUNION(ap->a_dvp);
-	struct componentname *cnp = ap->a_cnp;
-	struct vnode *uppervp;
-	int error;
+	struct unionfs_node *dunp;
+	struct componentname *cnp;
+	struct vnode   *udvp;
+	int		error;
 
-	switch (ap->a_flags) {
-	case CREATE:
-	case DELETE:
-		uppervp = union_lock_upper(un, cnp->cn_thread);
-		if (uppervp != NULLVP) {
-			error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags);
-			union_unlock_upper(uppervp, cnp->cn_thread);
-		} else
-			error = EOPNOTSUPP;
-		break;
-	case LOOKUP:
-		error = EOPNOTSUPP;
-		break;
-	default:
-		panic("union_whiteout: unknown op");
+	UNIONFS_INTERNAL_DEBUG("unionfs_whiteout: enter\n");
+
+	dunp = VTOUNIONFS(ap->a_dvp);
+	cnp = ap->a_cnp;
+	udvp = dunp->un_uppervp;
+	error = EOPNOTSUPP;
+
+	if (udvp != NULLVP) {
+		switch (ap->a_flags) {
+		case CREATE:
+		case DELETE:
+		case LOOKUP:
+			error = VOP_WHITEOUT(udvp, cnp, ap->a_flags);
+			break;
+		default:
+			error = EINVAL;
+			break;
+		}
 	}
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_whiteout: leave (%d)\n", error);
+
 	return (error);
 }
 
-/*
- * 	union_mknod:
- *
- *	a_dvp is locked on entry and should remain locked on return.
- *	a_vpp is garbagre whether an error occurs or not.
- */
-
 static int
-union_mknod(ap)
-	struct vop_mknod_args /* {
-		struct vnode *a_dvp;
-		struct vnode **a_vpp;
-		struct componentname *a_cnp;
-		struct vattr *a_vap;
-	} */ *ap;
-{
-	struct union_node *dun = VTOUNION(ap->a_dvp);
-	struct componentname *cnp = ap->a_cnp;
-	struct vnode *dvp;
-	int error = EROFS;
-
-	if ((dvp = union_lock_upper(dun, cnp->cn_thread)) != NULL) {
-		error = VOP_MKNOD(dvp, ap->a_vpp, cnp, ap->a_vap);
-		union_unlock_upper(dvp, cnp->cn_thread);
+unionfs_mknod(struct vop_mknod_args *ap)
+{
+	struct unionfs_node *dunp;
+	struct componentname *cnp;
+	struct thread  *td;
+	struct vnode   *udvp;
+	struct vnode   *vp;
+	int		error;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_mknod: enter\n");
+
+	dunp = VTOUNIONFS(ap->a_dvp);
+	cnp = ap->a_cnp;
+	td = curthread;
+	udvp = dunp->un_uppervp;
+	error = EROFS;
+
+	if (udvp != NULLVP) {
+		if ((error = VOP_MKNOD(udvp, &vp, cnp, ap->a_vap)) == 0) {
+			VOP_UNLOCK(vp, 0, td);
+			error = unionfs_nodeget(ap->a_dvp->v_mount, vp, NULLVP,
+			    ap->a_dvp, ap->a_vpp, cnp, td);
+			vrele(vp);
+		}
 	}
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_mknod: leave (%d)\n", error);
+
 	return (error);
 }
 
-/*
- *	union_open:
- *
- *	run open VOP.  When opening the underlying vnode we have to mimic
- *	vn_open().  What we *really* need to do to avoid screwups if the
- *	open semantics change is to call vn_open().  For example, ufs blows
- *	up if you open a file but do not vmio it prior to writing.
- */
-
 static int
-union_open(ap)
-	struct vop_open_args /* {
-		struct vnodeop_desc *a_desc;
-		struct vnode *a_vp;
-		int a_mode;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
-{
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *tvp;
-	int mode = ap->a_mode;
-	struct ucred *cred = ap->a_cred;
-	struct thread *td = ap->a_td;
-	int error = 0;
-	int tvpisupper = 1;
+unionfs_open(struct vop_open_args *ap)
+{
+	int		error;
+	struct unionfs_node *unp;
+	struct unionfs_node_status *unsp;
+	struct vnode   *uvp;
+	struct vnode   *lvp;
+	struct vnode   *targetvp;
+	struct ucred   *cred;
+	struct thread  *td;
 
-	/*
-	 * If there is an existing upper vp then simply open that.
-	 * The upper vp takes precedence over the lower vp.  When opening
-	 * a lower vp for writing copy it to the uppervp and then open the
-	 * uppervp.
-	 *
-	 * At the end of this section tvp will be left locked.
-	 */
-	if ((tvp = union_lock_upper(un, td)) == NULLVP) {
-		/*
-		 * If the lower vnode is being opened for writing, then
-		 * copy the file contents to the upper vnode and open that,
-		 * otherwise can simply open the lower vnode.
-		 */
-		tvp = un->un_lowervp;
-		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
-			int docopy = !(mode & O_TRUNC);
-			error = union_copyup(un, docopy, cred, td);
-			tvp = union_lock_upper(un, td);
+	UNIONFS_INTERNAL_DEBUG("unionfs_open: enter\n");
+
+	error = 0;
+	unp = VTOUNIONFS(ap->a_vp);
+	uvp = unp->un_uppervp;
+	lvp = unp->un_lowervp;
+	targetvp = NULLVP;
+	cred = ap->a_cred;
+	td = ap->a_td;
+
+	unionfs_get_node_status(unp, td, &unsp);
+
+	if (unsp->uns_lower_opencnt > 0 || unsp->uns_upper_opencnt > 0) {
+		/* vnode is already opend. */
+		if (unsp->uns_upper_opencnt > 0)
+			targetvp = uvp;
+		else
+			targetvp = lvp;
+
+		if (targetvp == lvp &&
+		    (ap->a_mode & FWRITE) && lvp->v_type == VREG)
+			targetvp = NULLVP;
+	}
+	if (targetvp == NULLVP) {
+		if (uvp == NULLVP) {
+			if ((ap->a_mode & FWRITE) && lvp->v_type == VREG) {
+				error = unionfs_copyfile(unp,
+				    !(ap->a_mode & O_TRUNC), cred, td);
+				if (error != 0)
+					goto unionfs_open_abort;
+				targetvp = uvp = unp->un_uppervp;
+			} else
+				targetvp = lvp;
+		} else
+			targetvp = uvp;
+	}
+
+	error = VOP_OPEN(targetvp, ap->a_mode, cred, td, ap->a_fp);
+	if (error == 0) {
+		if (targetvp == uvp) {
+			if (uvp->v_type == VDIR && lvp != NULLVP &&
+			    unsp->uns_lower_opencnt <= 0) {
+				/* open lower for readdir */
+				error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
+				if (error != 0) {
+					VOP_CLOSE(uvp, ap->a_mode, cred, td);
+					goto unionfs_open_abort;
+				}
+				unsp->uns_node_flag |= UNS_OPENL_4_READDIR;
+				unsp->uns_lower_opencnt++;
+			}
+			unsp->uns_upper_opencnt++;
 		} else {
-			un->un_openl++;
-			VREF(tvp);
-			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td);
-			tvpisupper = 0;
+			unsp->uns_lower_opencnt++;
+			unsp->uns_lower_openmode = ap->a_mode;
 		}
+		ap->a_vp->v_object = targetvp->v_object;
 	}
 
-	/*
-	 * We are holding the correct vnode, open it.
-	 */
+unionfs_open_abort:
+	if (error != 0)
+		unionfs_tryrem_node_status(unp, td, unsp);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_open: leave (%d)\n", error);
 
-	if (error == 0)
-		error = VOP_OPEN(tvp, mode, cred, td, -1);
-	if (error == 0)
-		ap->a_vp->v_object = tvp->v_object;
-	/*
-	 * Release any locks held.
-	 */
-	if (tvpisupper) {
-		if (tvp)
-			union_unlock_upper(tvp, td);
-	} else {
-		vput(tvp);
-	}
 	return (error);
 }
 
-/*
- *	union_close:
- *
- *	It is unclear whether a_vp is passed locked or unlocked.  Whatever
- *	the case we do not change it.
- */
-
 static int
-union_close(ap)
-	struct vop_close_args /* {
-		struct vnode *a_vp;
-		int  a_fflag;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
-{
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *vp;
-
-	if ((vp = un->un_uppervp) == NULLVP) {
-#ifdef UNION_DIAGNOSTIC
-		if (un->un_openl <= 0)
-			panic("union: un_openl cnt");
-#endif
-		--un->un_openl;
-		vp = un->un_lowervp;
+unionfs_close(struct vop_close_args *ap)
+{
+	int		error;
+	int		locked;
+	struct unionfs_node *unp;
+	struct unionfs_node_status *unsp;
+	struct ucred   *cred;
+	struct thread  *td;
+	struct vnode   *ovp;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_close: enter\n");
+
+	locked = 0;
+	unp = VTOUNIONFS(ap->a_vp);
+	cred = ap->a_cred;
+	td = ap->a_td;
+
+	if (VOP_ISLOCKED(ap->a_vp, td) != LK_EXCLUSIVE) {
+		vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, td);
+		locked = 1;
 	}
-	ap->a_vp = vp;
-	return (VOP_CLOSE_AP(ap));
+	unionfs_get_node_status(unp, td, &unsp);
+
+	if (unsp->uns_lower_opencnt <= 0 && unsp->uns_upper_opencnt <= 0) {
+#ifdef DIAGNOSTIC
+		printf("unionfs_close: warning: open count is 0\n");
+#endif
+		if (unp->un_uppervp != NULLVP)
+			ovp = unp->un_uppervp;
+		else
+			ovp = unp->un_lowervp;
+	} else if (unsp->uns_upper_opencnt > 0)
+		ovp = unp->un_uppervp;
+	else
+		ovp = unp->un_lowervp;
+
+	error = VOP_CLOSE(ovp, ap->a_fflag, cred, td);
+
+	if (error != 0)
+		goto unionfs_close_abort;
+
+	ap->a_vp->v_object = ovp->v_object;
+
+	if (ovp == unp->un_uppervp) {
+		unsp->uns_upper_opencnt--;
+		if (unsp->uns_upper_opencnt == 0) {
+			if (unsp->uns_node_flag & UNS_OPENL_4_READDIR) {
+				VOP_CLOSE(unp->un_lowervp, FREAD, cred, td);
+				unsp->uns_node_flag &= ~UNS_OPENL_4_READDIR;
+				unsp->uns_lower_opencnt--;
+			}
+			if (unsp->uns_lower_opencnt > 0)
+				ap->a_vp->v_object = unp->un_lowervp->v_object;
+		}
+	} else
+		unsp->uns_lower_opencnt--;
+
+unionfs_close_abort:
+	unionfs_tryrem_node_status(unp, td, unsp);
+
+	if (locked != 0)
+		VOP_UNLOCK(ap->a_vp, 0, td);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_close: leave (%d)\n", error);
+
+	return (error);
 }
 
 /*
- * Check access permission on the union vnode.
- * The access check being enforced is to check
- * against both the underlying vnode, and any
- * copied vnode.  This ensures that no additional
- * file permissions are given away simply because
- * the user caused an implicit file copy.
+ * Check the access mode toward shadow file/dir.
  */
 static int
-union_access(ap)
-	struct vop_access_args /* {
-		struct vnodeop_desc *a_desc;
-		struct vnode *a_vp;
-		int a_mode;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
-{
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct thread *td = ap->a_td;
-	int error = EACCES;
-	struct vnode *vp;
+unionfs_check_corrected_access(u_short mode,
+			     struct vattr *va,
+			     struct ucred *cred)
+{
+	int		count;
+	uid_t		uid;	/* upper side vnode's uid */
+	gid_t		gid;	/* upper side vnode's gid */
+	u_short		vmode;	/* upper side vnode's mode */
+	gid_t          *gp;
+	u_short		mask;
+
+	mask = 0;
+	uid = va->va_uid;
+	gid = va->va_gid;
+	vmode = va->va_mode;
+
+	/* check owner */
+	if (cred->cr_uid == uid) {
+		if (mode & VEXEC)
+			mask |= S_IXUSR;
+		if (mode & VREAD)
+			mask |= S_IRUSR;
+		if (mode & VWRITE)
+			mask |= S_IWUSR;
+		return ((vmode & mask) == mask ? 0 : EACCES);
+	}
+
+	/* check group */
+	count = 0;
+	gp = cred->cr_groups;
+	for (; count < cred->cr_ngroups; count++, gp++) {
+		if (gid == *gp) {
+			if (mode & VEXEC)
+				mask |= S_IXGRP;
+			if (mode & VREAD)
+				mask |= S_IRGRP;
+			if (mode & VWRITE)
+				mask |= S_IWGRP;
+			return ((vmode & mask) == mask ? 0 : EACCES);
+		}
+	}
+
+	/* check other */
+	if (mode & VEXEC)
+		mask |= S_IXOTH;
+	if (mode & VREAD)
+		mask |= S_IROTH;
+	if (mode & VWRITE)
+		mask |= S_IWOTH;
+
+	return ((vmode & mask) == mask ? 0 : EACCES);
+}
+
+static int
+unionfs_access(struct vop_access_args *ap)
+{
+	struct unionfs_mount *ump;
+	struct unionfs_node *unp;
+	struct vnode   *uvp;
+	struct vnode   *lvp;
+	struct thread  *td;
+	struct vattr	va;
+	int		mode;
+	int		error;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_access: enter\n");
+
+	ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount);
+	unp = VTOUNIONFS(ap->a_vp);
+	uvp = unp->un_uppervp;
+	lvp = unp->un_lowervp;
+	td = ap->a_td;
+	mode = ap->a_mode;
+	error = EACCES;
 
-	/*
-	 * Disallow write attempts on filesystems mounted read-only.
-	 */
-	if ((ap->a_mode & VWRITE) && 
+	if ((mode & VWRITE) &&
 	    (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (ap->a_vp->v_type) {
-		case VREG: 
+		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
@@ -830,1177 +674,1658 @@
 		}
 	}
 
-	if ((vp = union_lock_upper(un, td)) != NULLVP) {
-		ap->a_vp = vp;
-		error = VOP_ACCESS_AP(ap);
-		union_unlock_upper(vp, td);
-		return(error);
-	}
-
-	if ((vp = un->un_lowervp) != NULLVP) {
-		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
-		ap->a_vp = vp;
-
-		/*
-		 * Remove VWRITE from a_mode if our mount point is RW, because
-		 * we want to allow writes and lowervp may be read-only.
-		 */
-		if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0)
-			ap->a_mode &= ~VWRITE;
+	if (uvp != NULLVP) {
+		error = VOP_ACCESS(uvp, mode, ap->a_cred, td);
 
-		error = VOP_ACCESS_AP(ap);
-		if (error == 0) {
-			struct union_mount *um;
+		UNIONFS_INTERNAL_DEBUG("unionfs_access: leave (%d)\n", error);
 
-			um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount);
+		return (error);
+	}
 
-			if (um->um_op == UNMNT_BELOW) {
-				ap->a_cred = um->um_cred;
-				error = VOP_ACCESS_AP(ap);
+	if (lvp != NULLVP) {
+		if (mode & VWRITE) {
+			if (ump->um_uppervp->v_mount->mnt_flag & MNT_RDONLY) {
+				switch (ap->a_vp->v_type) {
+				case VREG:
+				case VDIR:
+				case VLNK:
+					return (EROFS);
+				default:
+					break;
+				}
+			} else if (ap->a_vp->v_type == VREG || ap->a_vp->v_type == VDIR) {
+				/* check shadow file/dir */
+				if (ump->um_copymode != UNIONFS_TRANSPARENT) {
+					error = unionfs_create_uppervattr(ump,
+					    lvp, &va, ap->a_cred, td);
+					if (error != 0)
+						return (error);
+
+					error = unionfs_check_corrected_access(
+					    mode, &va, ap->a_cred);
+					if (error != 0)
+						return (error);
+				}
 			}
+			mode &= ~VWRITE;
+			mode |= VREAD; /* will copy to upper */
 		}
-		VOP_UNLOCK(vp, 0, td);
+		error = VOP_ACCESS(lvp, mode, ap->a_cred, td);
 	}
-	return(error);
-}
 
-/*
- * We handle getattr only to change the fsid and
- * track object sizes
- *
- * It's not clear whether VOP_GETATTR is to be
- * called with the vnode locked or not.  stat() calls
- * it with (vp) locked, and fstat() calls it with
- * (vp) unlocked. 
- *
- * Because of this we cannot use our normal locking functions
- * if we do not intend to lock the main a_vp node.  At the moment
- * we are running without any specific locking at all, but beware
- * to any programmer that care must be taken if locking is added
- * to this function.
- */
+	UNIONFS_INTERNAL_DEBUG("unionfs_access: leave (%d)\n", error);
+
+	return (error);
+}
 
 static int
-union_getattr(ap)
-	struct vop_getattr_args /* {
-		struct vnode *a_vp;
-		struct vattr *a_vap;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
+unionfs_getattr(struct vop_getattr_args *ap)
 {
-	int error;
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct union_mount *um = MOUNTTOUNIONMOUNT(ap->a_vp->v_mount);
-	struct vnode *vp;
-	struct vattr *vap;
-	struct vattr va;
+	int		error;
+	struct unionfs_node *unp;
+	struct unionfs_mount *ump;
+	struct vnode   *uvp;
+	struct vnode   *lvp;
+	struct thread  *td;
+	struct vattr	va;
 
-	/*
-	 * Some programs walk the filesystem hierarchy by counting
-	 * links to directories to avoid stat'ing all the time.
-	 * This means the link count on directories needs to be "correct".
-	 * The only way to do that is to call getattr on both layers
-	 * and fix up the link count.  The link count will not necessarily
-	 * be accurate but will be large enough to defeat the tree walkers.
-	 */
+	UNIONFS_INTERNAL_DEBUG("unionfs_getattr: enter\n");
 
-	vap = ap->a_vap;
+	unp = VTOUNIONFS(ap->a_vp);
+	ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount);
+	uvp = unp->un_uppervp;
+	lvp = unp->un_lowervp;
+	td = ap->a_td;
 
-	if ((vp = un->un_uppervp) != NULLVP) {
-		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_td);
-		if (error)
-			return (error);
-		/* XXX isn't this dangerous without a lock? */
-		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
+	if (uvp != NULLVP) {
+		if ((error = VOP_GETATTR(uvp, ap->a_vap, ap->a_cred, td)) == 0)
+			ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+
+		UNIONFS_INTERNAL_DEBUG("unionfs_getattr: leave mode=%o, uid=%d, gid=%d (%d)\n",
+		    ap->a_vap->va_mode, ap->a_vap->va_uid,
+		    ap->a_vap->va_gid, error);
+
+		return (error);
 	}
 
-	if (vp == NULLVP) {
-		vp = un->un_lowervp;
-	} else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) {
-		vp = un->un_lowervp;
-		vap = &va;
-	} else {
-		vp = NULLVP;
-	}
-
-	if (vp != NULLVP) {
-		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_td);
-		if (error)
-			return (error);
-		/* XXX isn't this dangerous without a lock? */
-		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
+	error = VOP_GETATTR(lvp, ap->a_vap, ap->a_cred, td);
+
+	if (error == 0 && !(ump->um_uppervp->v_mount->mnt_flag & MNT_RDONLY)) {
+		/* correct the attr toward shadow file/dir. */
+		if (ap->a_vp->v_type == VREG || ap->a_vp->v_type == VDIR) {
+			unionfs_create_uppervattr_core(ump, ap->a_vap, &va, td);
+			ap->a_vap->va_mode = va.va_mode;
+			ap->a_vap->va_uid = va.va_uid;
+			ap->a_vap->va_gid = va.va_gid;
+		}
 	}
 
-	if (ap->a_vap->va_fsid == um->um_upperdev)
+	if (error == 0)
 		ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
 
-	if ((vap != ap->a_vap) && (vap->va_type == VDIR))
-		ap->a_vap->va_nlink += vap->va_nlink;
-	return (0);
+	UNIONFS_INTERNAL_DEBUG("unionfs_getattr: leave mode=%o, uid=%d, gid=%d (%d)\n",
+	    ap->a_vap->va_mode, ap->a_vap->va_uid, ap->a_vap->va_gid, error);
+
+	return (error);
 }
 
 static int
-union_setattr(ap)
-	struct vop_setattr_args /* {
-		struct vnode *a_vp;
-		struct vattr *a_vap;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
-{
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct thread *td = ap->a_td;
-	struct vattr *vap = ap->a_vap;
-	struct vnode *uppervp;
-	int error;
+unionfs_setattr(struct vop_setattr_args *ap)
+{
+	int		error;
+	struct unionfs_node *unp;
+	struct vnode   *uvp;
+	struct vnode   *lvp;
+	struct thread  *td;
+	struct vattr   *vap;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_setattr: enter\n");
+
+	error = EROFS;
+	unp = VTOUNIONFS(ap->a_vp);
+	uvp = unp->un_uppervp;
+	lvp = unp->un_lowervp;
+	td = ap->a_td;
+	vap = ap->a_vap;
 
-	/*
-	 * Disallow write attempts on filesystems mounted read-only.
-	 */
 	if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 	     vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
-	     vap->va_mtime.tv_sec != VNOVAL || 
-	     vap->va_mode != (mode_t)VNOVAL)) {
+	     vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL))
 		return (EROFS);
-	}
 
-	/*
-	 * Handle case of truncating lower object to zero size
-	 * by creating a zero length upper object.  This is to
-	 * handle the case of open with O_TRUNC and O_CREAT.
-	 */
-	if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) {
-		error = union_copyup(un, (ap->a_vap->va_size != 0),
-			    ap->a_cred, ap->a_td);
-		if (error)
+	if (uvp == NULLVP && lvp->v_type == VREG) {
+		error = unionfs_copyfile(unp, (vap->va_size != 0),
+		    ap->a_cred, td);
+		if (error != 0)
 			return (error);
+		uvp = unp->un_uppervp;
 	}
 
-	/*
-	 * Try to set attributes in upper layer,
-	 * otherwise return read-only filesystem error.
-	 */
-	error = EROFS;
-	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
-		error = VOP_SETATTR(un->un_uppervp, ap->a_vap,
-					ap->a_cred, ap->a_td);
-		if ((error == 0) && (ap->a_vap->va_size != VNOVAL))
-			union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL);
-		union_unlock_upper(uppervp, td);
-	}
+	if (uvp != NULLVP)
+		error = VOP_SETATTR(uvp, vap, ap->a_cred, td);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_setattr: leave (%d)\n", error);
+
 	return (error);
 }
 
 static int
-union_read(ap)
-	struct vop_read_args /* {
-		struct vnode *a_vp;
-		struct uio *a_uio;
-		int  a_ioflag;
-		struct ucred *a_cred;
-	} */ *ap;
+unionfs_read(struct vop_read_args *ap)
+{
+	int		error;
+	struct unionfs_node *unp;
+	struct vnode   *tvp;
+
+	/* UNIONFS_INTERNAL_DEBUG("unionfs_read: enter\n"); */
+
+	unp = VTOUNIONFS(ap->a_vp);
+	tvp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
+
+	error = VOP_READ(tvp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+
+	/* UNIONFS_INTERNAL_DEBUG("unionfs_read: leave (%d)\n", error); */
+
+	return (error);
+}
+
+static int
+unionfs_write(struct vop_write_args *ap)
+{
+	int		error;
+	struct unionfs_node *unp;
+	struct vnode   *tvp;
+
+	/* UNIONFS_INTERNAL_DEBUG("unionfs_write: enter\n"); */
+
+	unp = VTOUNIONFS(ap->a_vp);
+	tvp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
+
+	error = VOP_WRITE(tvp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+
+	/* UNIONFS_INTERNAL_DEBUG("unionfs_write: leave (%d)\n", error); */
+
+	return (error);
+}
+
+static int
+unionfs_lease(struct vop_lease_args *ap)
 {
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct thread *td = ap->a_uio->uio_td;
-	struct vnode *uvp;
 	int error;
+	struct unionfs_node *unp;
+	struct vnode   *vp;
 
-	uvp = union_lock_other(un, td);
-	KASSERT(uvp != NULL, ("union_read: backing vnode missing!"));
+	UNIONFS_INTERNAL_DEBUG("unionfs_lease: enter\n");
 
-	error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred);
-	union_unlock_other(uvp, td);
+	unp = VTOUNIONFS(ap->a_vp);
+	vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
 
-	/*
-	 * XXX
-	 * Perhaps the size of the underlying object has changed under
-	 * our feet.  Take advantage of the offset information present
-	 * in the uio structure.
-	 */
-	if (error == 0) {
-		struct union_node *un = VTOUNION(ap->a_vp);
-		off_t cur = ap->a_uio->uio_offset;
+	error = VOP_LEASE(vp, ap->a_td, ap->a_cred, ap->a_flag);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_lease: lease (%d)\n", error);
 
-		if (uvp == un->un_uppervp) {
-			if (cur > un->un_uppersz)
-				union_newsize(ap->a_vp, cur, VNOVAL);
-		} else {
-			if (cur > un->un_lowersz)
-				union_newsize(ap->a_vp, VNOVAL, cur);
-		}
-	}
 	return (error);
 }
 
 static int
-union_write(ap)
-	struct vop_write_args /* {
-		struct vnode *a_vp;
-		struct uio *a_uio;
-		int  a_ioflag;
-		struct ucred *a_cred;
-	} */ *ap;
+unionfs_ioctl(struct vop_ioctl_args *ap)
 {
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct thread *td = ap->a_uio->uio_td;
-	struct vnode *uppervp;
 	int error;
+	struct unionfs_node *unp;
+	struct unionfs_node_status *unsp;
+	struct vnode   *ovp;
 
-	if ((uppervp = union_lock_upper(un, td)) == NULLVP)
-		panic("union: missing upper layer in write");
+	UNIONFS_INTERNAL_DEBUG("unionfs_ioctl: enter\n");
 
-	error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+ 	vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, ap->a_td);
+	unp = VTOUNIONFS(ap->a_vp);
+	unionfs_get_node_status(unp, ap->a_td, &unsp);
+	ovp = (unsp->uns_upper_opencnt ? unp->un_uppervp : unp->un_lowervp);
+	unionfs_tryrem_node_status(unp, ap->a_td, unsp);
+	VOP_UNLOCK(ap->a_vp, 0, ap->a_td);
 
-	/*
-	 * The size of the underlying object may be changed by the
-	 * write.
-	 */
-	if (error == 0) {
-		off_t cur = ap->a_uio->uio_offset;
+	if (ovp == NULLVP)
+		return (EBADF);
+
+	error = VOP_IOCTL(ovp, ap->a_command, ap->a_data, ap->a_fflag,
+	    ap->a_cred, ap->a_td);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_ioctl: lease (%d)\n", error);
 
-		if (cur > un->un_uppersz)
-			union_newsize(ap->a_vp, cur, VNOVAL);
-	}
-	union_unlock_upper(uppervp, td);
 	return (error);
 }
 
 static int
-union_lease(ap)
-	struct vop_lease_args /* {
-		struct vnode *a_vp;
-		struct thread *a_td;
-		struct ucred *a_cred;
-		int a_flag;
-	} */ *ap;
+unionfs_poll(struct vop_poll_args *ap)
 {
-	struct vnode *ovp = OTHERVP(ap->a_vp);
+	struct unionfs_node *unp;
+	struct unionfs_node_status *unsp;
+	struct vnode   *ovp;
+
+ 	vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, ap->a_td);
+	unp = VTOUNIONFS(ap->a_vp);
+	unionfs_get_node_status(unp, ap->a_td, &unsp);
+	ovp = (unsp->uns_upper_opencnt ? unp->un_uppervp : unp->un_lowervp);
+	unionfs_tryrem_node_status(unp, ap->a_td, unsp);
+	VOP_UNLOCK(ap->a_vp, 0, ap->a_td);
+
+	if (ovp == NULLVP)
+		return (EBADF);
 
-	ap->a_vp = ovp;
-	return (VOP_LEASE_AP(ap));
+	return (VOP_POLL(ovp, ap->a_events, ap->a_cred, ap->a_td));
 }
 
 static int
-union_ioctl(ap)
-	struct vop_ioctl_args /* {
-		struct vnode *a_vp;
-		u_long  a_command;
-		caddr_t  a_data;
-		int  a_fflag;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
+unionfs_fsync(struct vop_fsync_args *ap)
 {
-	struct vnode *ovp = OTHERVP(ap->a_vp);
+	struct unionfs_node *unp;
+	struct unionfs_node_status *unsp;
+	struct vnode   *ovp;
 
-	ap->a_vp = ovp;
-	return (VOP_IOCTL_AP(ap));
+	unp = VTOUNIONFS(ap->a_vp);
+	unionfs_get_node_status(unp, ap->a_td, &unsp);
+	ovp = (unsp->uns_upper_opencnt ? unp->un_uppervp : unp->un_lowervp);
+	unionfs_tryrem_node_status(unp, ap->a_td, unsp);
+
+	if (ovp == NULLVP)
+		return (EBADF);
+
+	return (VOP_FSYNC(ovp, ap->a_waitfor, ap->a_td));
 }
 
 static int
-union_poll(ap)
-	struct vop_poll_args /* {
-		struct vnode *a_vp;
-		int  a_events;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
+unionfs_remove(struct vop_remove_args *ap)
 {
-	struct vnode *ovp = OTHERVP(ap->a_vp);
+	int		error;
+	struct unionfs_node *dunp;
+	struct unionfs_node *unp;
+	struct unionfs_mount *ump;
+	struct vnode   *udvp;
+	struct vnode   *uvp;
+	struct vnode   *lvp;
+	struct componentname *cnp;
+	struct thread  *td;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_remove: enter\n");
+
+	error = 0;
+	dunp = VTOUNIONFS(ap->a_dvp);
+	unp = VTOUNIONFS(ap->a_vp);
+	udvp = dunp->un_uppervp;
+	uvp = unp->un_uppervp;
+	lvp = unp->un_lowervp;
+	cnp = ap->a_cnp;
+	td = curthread;
+
+	if (udvp == NULLVP)
+		return (EROFS);
 
-	ap->a_vp = ovp;
-	return (VOP_POLL_AP(ap));
+	if (uvp != NULLVP) {
+		ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount);
+		if (ump->um_whitemode == UNIONFS_WHITE_ALWAYS || lvp != NULLVP)
+			cnp->cn_flags |= DOWHITEOUT;
+		error = VOP_REMOVE(udvp, uvp, cnp);
+	} else if (lvp != NULLVP)
+		error = unionfs_mkwhiteout(udvp, cnp, td, unp->un_path);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_remove: leave (%d)\n", error);
+
+	return (error);
 }
 
 static int
-union_fsync(ap)
-	struct vop_fsync_args /* {
-		struct vnode *a_vp;
-		struct ucred *a_cred;
-		int  a_waitfor;
-		struct thread *a_td;
-	} */ *ap;
+unionfs_link(struct vop_link_args *ap)
 {
-	int error = 0;
-	struct thread *td = ap->a_td;
-	struct vnode *targetvp;
-	struct union_node *un = VTOUNION(ap->a_vp);
+	int		error;
+	int		needrelookup;
+	struct unionfs_node *dunp;
+	struct unionfs_node *unp;
+	struct vnode   *udvp;
+	struct vnode   *uvp;
+	struct componentname *cnp;
+	struct thread  *td;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_link: enter\n");
 
-	if ((targetvp = union_lock_other(un, td)) != NULLVP) {
-		error = VOP_FSYNC(targetvp, ap->a_waitfor, td);
-		union_unlock_other(targetvp, td);
+	error = 0;
+	needrelookup = 0;
+	dunp = VTOUNIONFS(ap->a_tdvp);
+	unp = NULL;
+	udvp = dunp->un_uppervp;
+	uvp = NULLVP;
+	cnp = ap->a_cnp;
+	td = curthread;
+
+	if (udvp == NULLVP)
+		return (EROFS);
+
+	if (ap->a_vp->v_op != &unionfs_vnodeops)
+		uvp = ap->a_vp;
+	else {
+		unp = VTOUNIONFS(ap->a_vp);
+
+		if (unp->un_uppervp == NULLVP) {
+			if (ap->a_vp->v_type != VREG)
+				return (EOPNOTSUPP);
+
+			error = unionfs_copyfile(unp, 1, cnp->cn_cred, td);
+			if (error != 0)
+				return (error);
+			needrelookup = 1;
+		}
+		uvp = unp->un_uppervp;
 	}
 
+	if (needrelookup != 0)
+		error = unionfs_relookup_for_create(ap->a_tdvp, cnp, td);
+
+	if (error == 0)
+		error = VOP_LINK(udvp, uvp, cnp);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_link: leave (%d)\n", error);
+
 	return (error);
 }
 
-/*
- *	union_remove:
- *
- *	Remove the specified cnp.  The dvp and vp are passed to us locked
- *	and must remain locked on return.
- */
-
 static int
-union_remove(ap)
-	struct vop_remove_args /* {
-		struct vnode *a_dvp;
-		struct vnode *a_vp;
-		struct componentname *a_cnp;
-	} */ *ap;
-{
-	struct union_node *dun = VTOUNION(ap->a_dvp);
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct componentname *cnp = ap->a_cnp;
-	struct thread *td = cnp->cn_thread;
-	struct vnode *uppervp;
-	struct vnode *upperdvp;
-	int error;
+unionfs_rename(struct vop_rename_args *ap)
+{
+	int		error;
+	struct vnode   *fdvp;
+	struct vnode   *fvp;
+	struct componentname *fcnp;
+	struct vnode   *tdvp;
+	struct vnode   *tvp;
+	struct componentname *tcnp;
+	struct vnode   *ltdvp;
+	struct vnode   *ltvp;
+	struct thread  *td;
+
+	/* rename target vnodes */
+	struct vnode   *rfdvp;
+	struct vnode   *rfvp;
+	struct vnode   *rtdvp;
+	struct vnode   *rtvp;
+
+	int		needrelookup;
+	struct unionfs_mount *ump;
+	struct unionfs_node *unp;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_rename: enter\n");
+
+	error = 0;
+	fdvp = ap->a_fdvp;
+	fvp = ap->a_fvp;
+	fcnp = ap->a_fcnp;
+	tdvp = ap->a_tdvp;
+	tvp = ap->a_tvp;
+	tcnp = ap->a_tcnp;
+	ltdvp = NULLVP;
+	ltvp = NULLVP;
+	td = curthread;
+	rfdvp = fdvp;
+	rfvp = fvp;
+	rtdvp = tdvp;
+	rtvp = tvp;
+	needrelookup = 0;
 
-	if ((upperdvp = union_lock_upper(dun, td)) == NULLVP)
-		panic("union remove: null upper vnode");
+#ifdef DIAGNOSTIC
+	if (!(fcnp->cn_flags & HASBUF) || !(tcnp->cn_flags & HASBUF))
+		panic("unionfs_rename: no name");
+#endif
 
-	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
-		if (union_dowhiteout(un, cnp->cn_cred, td))
-			cnp->cn_flags |= DOWHITEOUT;
-		if (cnp->cn_flags & DOWHITEOUT)		/* XXX fs corruption */
-			error = EOPNOTSUPP;
+	/* check for cross device rename */
+	if (fvp->v_mount != tdvp->v_mount ||
+	    (tvp != NULLVP && fvp->v_mount != tvp->v_mount)) {
+		error = EXDEV;
+		goto unionfs_rename_abort;
+	}
+
+	/* Renaming a file to itself has no effect. */
+	if (fvp == tvp)
+		goto unionfs_rename_abort;
+
+	/*
+	 * from/to vnode is unionfs node.
+	 */
+
+	unp = VTOUNIONFS(fdvp);
+#ifdef UNIONFS_IDBG_RENAME
+	UNIONFS_INTERNAL_DEBUG("fdvp=%p, ufdvp=%p, lfdvp=%p\n", fdvp, unp->un_uppervp, unp->un_lowervp);
+#endif
+	if (unp->un_uppervp == NULLVP) {
+		error = ENODEV;
+		goto unionfs_rename_abort;
+	}
+	rfdvp = unp->un_uppervp;
+	vref(rfdvp);
+
+	unp = VTOUNIONFS(fvp);
+#ifdef UNIONFS_IDBG_RENAME
+	UNIONFS_INTERNAL_DEBUG("fvp=%p, ufvp=%p, lfvp=%p\n", fvp, unp->un_uppervp, unp->un_lowervp);
+#endif
+	ump = MOUNTTOUNIONFSMOUNT(fvp->v_mount);
+	if (unp->un_uppervp == NULLVP) {
+		switch (fvp->v_type) {
+		case VREG:
+			if ((error = vn_lock(fvp, LK_EXCLUSIVE, td)) != 0)
+				goto unionfs_rename_abort;
+			error = unionfs_copyfile(unp, 1, fcnp->cn_cred, td);
+			VOP_UNLOCK(fvp, 0, td);
+			if (error != 0)
+				goto unionfs_rename_abort;
+			break;
+		case VDIR:
+			if ((error = vn_lock(fvp, LK_EXCLUSIVE, td)) != 0)
+				goto unionfs_rename_abort;
+			error = unionfs_mkshadowdir(ump, rfdvp, unp, fcnp, td);
+			VOP_UNLOCK(fvp, 0, td);
+			if (error != 0)
+				goto unionfs_rename_abort;
+			break;
+		default:
+			error = ENODEV;
+			goto unionfs_rename_abort;
+		}
+
+		needrelookup = 1;
+	}
+
+	if (unp->un_lowervp != NULLVP)
+		fcnp->cn_flags |= DOWHITEOUT;
+	rfvp = unp->un_uppervp;
+	vref(rfvp);
+
+	unp = VTOUNIONFS(tdvp);
+#ifdef UNIONFS_IDBG_RENAME
+	UNIONFS_INTERNAL_DEBUG("tdvp=%p, utdvp=%p, ltdvp=%p\n", tdvp, unp->un_uppervp, unp->un_lowervp);
+#endif
+	if (unp->un_uppervp == NULLVP) {
+		error = ENODEV;
+		goto unionfs_rename_abort;
+	}
+	rtdvp = unp->un_uppervp;
+	ltdvp = unp->un_lowervp;
+	vref(rtdvp);
+
+	if (tdvp == tvp) {
+		rtvp = rtdvp;
+		vref(rtvp);
+	} else if (tvp != NULLVP) {
+		unp = VTOUNIONFS(tvp);
+#ifdef UNIONFS_IDBG_RENAME
+		UNIONFS_INTERNAL_DEBUG("tvp=%p, utvp=%p, ltvp=%p\n", tvp, unp->un_uppervp, unp->un_lowervp);
+#endif
+		if (unp->un_uppervp == NULLVP)
+			rtvp = NULLVP;
+		else {
+			if (tvp->v_type == VDIR) {
+				error = EINVAL;
+				goto unionfs_rename_abort;
+			}
+			rtvp = unp->un_uppervp;
+			ltvp = unp->un_lowervp;
+			vref(rtvp);
+		}
+	}
+
+	if (needrelookup != 0) {
+		if ((error = vn_lock(fdvp, LK_EXCLUSIVE, td)) != 0)
+			goto unionfs_rename_abort;
+		error = unionfs_relookup_for_delete(fdvp, fcnp, td);
+		VOP_UNLOCK(fdvp, 0, td);
+		if (error != 0)
+			goto unionfs_rename_abort;
+
+		/* Locke of tvp is canceled in order to avoid recursive lock. */
+		if (tvp != NULLVP && tvp != tdvp)
+			VOP_UNLOCK(tvp, 0, td);
+		error = unionfs_relookup_for_rename(tdvp, tcnp, td);
+		if (tvp != NULLVP && tvp != tdvp)
+			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td);
+		if (error != 0)
+			goto unionfs_rename_abort;
+	}
+
+	error = VOP_RENAME(rfdvp, rfvp, fcnp, rtdvp, rtvp, tcnp);
+
+	if (error == 0) {
+		if (rtvp != NULLVP && rtvp->v_type == VDIR)
+			cache_purge(tdvp);
+		if (fvp->v_type == VDIR && fdvp != tdvp)
+			cache_purge(fdvp);
+	}
+
+	if (fdvp != rfdvp)
+		vrele(fdvp);
+	if (fvp != rfvp)
+		vrele(fvp);
+	if (ltdvp != NULLVP)
+		VOP_UNLOCK(ltdvp, 0, td);
+	if (tdvp != rtdvp)
+		vrele(tdvp);
+	if (ltvp != NULLVP)
+		VOP_UNLOCK(ltvp, 0, td);
+	if (tvp != rtvp && tvp != NULLVP) {
+		if (rtvp == NULLVP)
+			vput(tvp);
 		else
-			error = VOP_REMOVE(upperdvp, uppervp, cnp);
-		if (!error)
-			union_removed_upper(un);
-		union_unlock_upper(uppervp, td);
-	} else {
-		error = union_mkwhiteout(
-			    MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
-			    upperdvp, ap->a_cnp, un->un_path);
+			vrele(tvp);
 	}
-	union_unlock_upper(upperdvp, td);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_rename: leave (%d)\n", error);
+
 	return (error);
-}
 
-/*
- *	union_link:
- *
- *	tdvp and vp will be locked on entry.
- *	tdvp and vp should remain locked on return.
- */
+unionfs_rename_abort:
+	if (fdvp != rfdvp)
+		vrele(rfdvp);
+	if (fvp != rfvp)
+		vrele(rfvp);
+	if (tdvp != rtdvp)
+		vrele(rtdvp);
+	vput(tdvp);
+	if (tvp != rtvp && rtvp != NULLVP)
+		vrele(rtvp);
+	if (tvp != NULLVP) {
+		if (tdvp != tvp)
+			vput(tvp);
+		else
+			vrele(tvp);
+	}
+	vrele(fdvp);
+	vrele(fvp);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_rename: leave (%d)\n", error);
+
+	return (error);
+}
 
 static int
-union_link(ap)
-	struct vop_link_args /* {
-		struct vnode *a_tdvp;
-		struct vnode *a_vp;
-		struct componentname *a_cnp;
-	} */ *ap;
-{
-	struct componentname *cnp = ap->a_cnp;
-	struct thread *td = cnp->cn_thread;
-	struct union_node *dun = VTOUNION(ap->a_tdvp);
-	struct vnode *vp;
-	struct vnode *tdvp;
-	int error = 0;
-
-	if (ap->a_tdvp->v_op != ap->a_vp->v_op) {
-		vp = ap->a_vp;
-	} else {
-		struct union_node *tun = VTOUNION(ap->a_vp);
+unionfs_mkdir(struct vop_mkdir_args *ap)
+{
+	int		error;
+	int		lkflags;
+	struct unionfs_node *dunp;
+	struct componentname *cnp;
+	struct thread  *td;
+	struct vnode   *udvp;
+	struct vnode   *uvp;
+	struct vattr	va;
 
-		if (tun->un_uppervp == NULLVP) {
-#if 0
-			if (dun->un_uppervp == tun->un_dirvp) {
-				if (dun->un_flags & UN_ULOCK) {
-					dun->un_flags &= ~UN_ULOCK;
-					VOP_UNLOCK(dun->un_uppervp, 0, td);
-				}
-			}
-#endif
-			error = union_copyup(tun, 1, cnp->cn_cred, td);
-#if 0
-			if (dun->un_uppervp == tun->un_dirvp) {
-				vn_lock(dun->un_uppervp,
-					    LK_EXCLUSIVE | LK_RETRY, td);
-				dun->un_flags |= UN_ULOCK;
-			}
-#endif
-			if (error)
+	UNIONFS_INTERNAL_DEBUG("unionfs_mkdir: enter\n");
+
+	error = EROFS;
+	dunp = VTOUNIONFS(ap->a_dvp);
+	cnp = ap->a_cnp;
+	lkflags = cnp->cn_lkflags;
+	td = curthread;
+	udvp = dunp->un_uppervp;
+
+	if (udvp != NULLVP) {
+		/* check opaque */
+		if (!(cnp->cn_flags & ISWHITEOUT)) {
+			error = VOP_GETATTR(udvp, &va, cnp->cn_cred, td);
+			if (error != 0)
 				return (error);
+			if (va.va_flags & OPAQUE) 
+				cnp->cn_flags |= ISWHITEOUT;
+		}
+
+		if ((error = VOP_MKDIR(udvp, &uvp, cnp, ap->a_vap)) == 0) {
+			VOP_UNLOCK(uvp, 0, td);
+			cnp->cn_lkflags = LK_EXCLUSIVE;
+			error = unionfs_nodeget(ap->a_dvp->v_mount, uvp, NULLVP,
+			    ap->a_dvp, ap->a_vpp, cnp, td);
+			cnp->cn_lkflags = lkflags;
+			vrele(uvp);
 		}
-		vp = tun->un_uppervp;
-		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	}
 
-	/*
-	 * Make sure upper is locked, then unlock the union directory we were 
-	 * called with to avoid a deadlock while we are calling VOP_LINK() on 
-	 * the upper (with tdvp locked and vp not locked).  Our ap->a_tdvp
-	 * is expected to be locked on return.
-	 */
+	UNIONFS_INTERNAL_DEBUG("unionfs_mkdir: leave (%d)\n", error);
+
+	return (error);
+}
 
-	if ((tdvp = union_lock_upper(dun, td)) == NULLVP)
+static int
+unionfs_rmdir(struct vop_rmdir_args *ap)
+{
+	int		error;
+	struct unionfs_node *dunp;
+	struct unionfs_node *unp;
+	struct unionfs_mount *ump;
+	struct componentname *cnp;
+	struct thread  *td;
+	struct vnode   *udvp;
+	struct vnode   *uvp;
+	struct vnode   *lvp;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_rmdir: enter\n");
+
+	error = 0;
+	dunp = VTOUNIONFS(ap->a_dvp);
+	unp = VTOUNIONFS(ap->a_vp);
+	cnp = ap->a_cnp;
+	td = curthread;
+	udvp = dunp->un_uppervp;
+	uvp = unp->un_uppervp;
+	lvp = unp->un_lowervp;
+
+	if (udvp == NULLVP)
 		return (EROFS);
 
-	VOP_UNLOCK(ap->a_tdvp, 0, td);		/* unlock calling node */
-	error = VOP_LINK(tdvp, vp, cnp);	/* call link on upper */
+	if (udvp == uvp)
+		return (EOPNOTSUPP);
+
+	if (uvp != NULLVP) {
+		if (lvp != NULLVP) {
+			error = unionfs_check_rmdir(ap->a_vp, cnp->cn_cred, td);
+			if (error != 0)
+				return (error);
+		}
+		ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount);
+		if (ump->um_whitemode == UNIONFS_WHITE_ALWAYS || lvp != NULLVP)
+			cnp->cn_flags |= DOWHITEOUT;
+		error = VOP_RMDIR(udvp, uvp, cnp);
+	}
+	else if (lvp != NULLVP)
+		error = unionfs_mkwhiteout(udvp, cnp, td, unp->un_path);
+
+	if (error == 0) {
+		cache_purge(ap->a_dvp);
+		cache_purge(ap->a_vp);
+	}
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_rmdir: leave (%d)\n", error);
+
+	return (error);
+}
+
+static int
+unionfs_symlink(struct vop_symlink_args *ap)
+{
+	int		error;
+	int		lkflags;
+	struct unionfs_node *dunp;
+	struct componentname *cnp;
+	struct thread  *td;
+	struct vnode   *udvp;
+	struct vnode   *uvp;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_symlink: enter\n");
+
+	error = EROFS;
+	dunp = VTOUNIONFS(ap->a_dvp);
+	cnp = ap->a_cnp;
+	lkflags = cnp->cn_lkflags;
+	td = curthread;
+	udvp = dunp->un_uppervp;
+
+	if (udvp != NULLVP) {
+		error = VOP_SYMLINK(udvp, &uvp, cnp, ap->a_vap, ap->a_target);
+		if (error == 0) {
+			VOP_UNLOCK(uvp, 0, td);
+			cnp->cn_lkflags = LK_EXCLUSIVE;
+			error = unionfs_nodeget(ap->a_dvp->v_mount, uvp, NULLVP,
+			    ap->a_dvp, ap->a_vpp, cnp, td);
+			cnp->cn_lkflags = lkflags;
+			vrele(uvp);
+		}
+	}
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_symlink: leave (%d)\n", error);
 
-	/*
-	 * Unlock tun->un_uppervp if we locked it above.
-	 */
-	if (ap->a_tdvp->v_op == ap->a_vp->v_op)
-		VOP_UNLOCK(vp, 0, td);
-	/*
-	 * We have to unlock tdvp prior to relocking our calling node in
-	 * order to avoid a deadlock.  We also have to unlock ap->a_vp
-	 * before relocking the directory, but then we have to relock
-	 * ap->a_vp as our caller expects.
-	 */
-	VOP_UNLOCK(ap->a_vp, 0, td);
-	union_unlock_upper(tdvp, td);
-	vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY, td);
-	vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, td);
 	return (error);
 }
 
 static int
-union_rename(ap)
-	struct vop_rename_args  /* {
-		struct vnode *a_fdvp;
-		struct vnode *a_fvp;
-		struct componentname *a_fcnp;
-		struct vnode *a_tdvp;
-		struct vnode *a_tvp;
-		struct componentname *a_tcnp;
-	} */ *ap;
+unionfs_readdir(struct vop_readdir_args *ap)
 {
-	int error;
-	struct vnode *fdvp = ap->a_fdvp;
-	struct vnode *fvp = ap->a_fvp;
-	struct vnode *tdvp = ap->a_tdvp;
-	struct vnode *tvp = ap->a_tvp;
+	int		error;
+	int		eofflag;
+	int		locked;
+	struct unionfs_node *unp;
+	struct unionfs_node_status *unsp;
+	struct uio     *uio;
+	struct vnode   *uvp;
+	struct vnode   *lvp;
+	struct thread  *td;
+	struct vattr    va;
 
-	/*
-	 * Figure out what fdvp to pass to our upper or lower vnode.  If we
-	 * replace the fdvp, release the original one and ref the new one.
-	 */
+	int		ncookies_bk;
+	u_long         *cookies_bk;
 
-	if (fdvp->v_op == &union_vnodeops) {	/* always true */
-		struct union_node *un = VTOUNION(fdvp);
-		if (un->un_uppervp == NULLVP) {
-			/*
-			 * this should never happen in normal
-			 * operation but might if there was
-			 * a problem creating the top-level shadow
-			 * directory.
-			 */
-			error = EXDEV;
-			goto bad;
-		}
-		fdvp = un->un_uppervp;
-		VREF(fdvp);
-		vrele(ap->a_fdvp);
+	UNIONFS_INTERNAL_DEBUG("unionfs_readdir: enter\n");
+
+	error = 0;
+	eofflag = 0;
+	locked = 0;
+	unp = VTOUNIONFS(ap->a_vp);
+	uio = ap->a_uio;
+	uvp = unp->un_uppervp;
+	lvp = unp->un_lowervp;
+	td = uio->uio_td;
+	ncookies_bk = 0;
+	cookies_bk = NULL;
+
+	if (ap->a_vp->v_type != VDIR)
+		return (ENOTDIR);
+
+	/* check opaque */
+	if (uvp != NULLVP && lvp != NULLVP) {
+		if ((error = VOP_GETATTR(uvp, &va, ap->a_cred, td)) != 0)
+			goto unionfs_readdir_exit;
+		if (va.va_flags & OPAQUE)
+			lvp = NULLVP;
 	}
 
-	/*
-	 * Figure out what fvp to pass to our upper or lower vnode.  If we
-	 * replace the fvp, release the original one and ref the new one.
-	 */
+	/* check the open count. unionfs needs to open before readdir. */
+	if (VOP_ISLOCKED(ap->a_vp, td) != LK_EXCLUSIVE) {
+		vn_lock(ap->a_vp, LK_UPGRADE | LK_RETRY, td);
+		locked = 1;
+	}
+	unionfs_get_node_status(unp, td, &unsp);
+	if ((uvp != NULLVP && unsp->uns_upper_opencnt <= 0) ||
+	    (lvp != NULLVP && unsp->uns_lower_opencnt <= 0)) {
+		unionfs_tryrem_node_status(unp, td, unsp);
+		error = EBADF;
+	}
+	if (locked == 1)
+		vn_lock(ap->a_vp, LK_DOWNGRADE | LK_RETRY, td);
+	if (error != 0)
+		goto unionfs_readdir_exit;
 
-	if (fvp->v_op == &union_vnodeops) {	/* always true */
-		struct union_node *un = VTOUNION(fvp);
-#if 0
-		struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount);
-#endif
+	/* upper only */
+	if (uvp != NULLVP && lvp == NULLVP) {
+		error = VOP_READDIR(uvp, uio, ap->a_cred, ap->a_eofflag,
+		    ap->a_ncookies, ap->a_cookies);
+		unsp->uns_readdir_status = 0;
 
-		if (un->un_uppervp == NULLVP) {
-			switch(fvp->v_type) {
-			case VREG:
-				vn_lock(un->un_vnode, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_thread);
-				error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_thread);
-				VOP_UNLOCK(un->un_vnode, 0, ap->a_fcnp->cn_thread);
-				if (error)
-					goto bad;
-				break;
-			case VDIR:
-				/*
-				 * XXX not yet.
-				 *
-				 * There is only one way to rename a directory
-				 * based in the lowervp, and that is to copy
-				 * the entire directory hierarchy.  Otherwise
-				 * it would not last across a reboot.
-				 */
-#if 0
-				vrele(fvp);
-				fvp = NULL;
-				vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_thread);
-				error = union_mkshadow(um, fdvp, 
-					    ap->a_fcnp, &un->un_uppervp);
-				VOP_UNLOCK(fdvp, 0, ap->a_fcnp->cn_thread);
-				if (un->un_uppervp)
-					VOP_UNLOCK(un->un_uppervp, 0, ap->a_fcnp->cn_thread);
-				if (error)
-					goto bad;
-				break;
-#endif
-			default:
-				error = EXDEV;
-				goto bad;
-			}
-		}
+		goto unionfs_readdir_exit;
+	}
 
-		if (un->un_lowervp != NULLVP)
-			ap->a_fcnp->cn_flags |= DOWHITEOUT;
-		fvp = un->un_uppervp;
-		VREF(fvp);
-		vrele(ap->a_fvp);
+	/* lower only */
+	if (uvp == NULLVP && lvp != NULLVP) {
+		error = VOP_READDIR(lvp, uio, ap->a_cred, ap->a_eofflag,
+		    ap->a_ncookies, ap->a_cookies);
+		unsp->uns_readdir_status = 2;
+
+		goto unionfs_readdir_exit;
 	}
 
 	/*
-	 * Figure out what tdvp (destination directory) to pass to the
-	 * lower level.  If we replace it with uppervp, we need to vput the 
-	 * old one.  The exclusive lock is transfered to what we will pass
-	 * down in the VOP_RENAME() and we replace uppervp with a simple
-	 * reference.
+	 * readdir upper and lower
 	 */
+	KASSERT(uvp != NULLVP, ("unionfs_readdir: null upper vp"));
+	KASSERT(lvp != NULLVP, ("unionfs_readdir: null lower vp"));
+	if (uio->uio_offset == 0)
+		unsp->uns_readdir_status = 0;
 
-	if (tdvp->v_op == &union_vnodeops) {
-		struct union_node *un = VTOUNION(tdvp);
+	if (unsp->uns_readdir_status == 0) {
+		/* read upper */
+		error = VOP_READDIR(uvp, uio, ap->a_cred, &eofflag,
+				    ap->a_ncookies, ap->a_cookies);
 
-		if (un->un_uppervp == NULLVP) {
-			/*
-			 * This should never happen in normal
-			 * operation but might if there was
-			 * a problem creating the top-level shadow
-			 * directory.
-			 */
-			error = EXDEV;
-			goto bad;
-		}
+		if (error != 0 || eofflag == 0)
+			goto unionfs_readdir_exit;
+		unsp->uns_readdir_status = 1;
 
 		/*
-		 * New tdvp is a lock and reference on uppervp.
-		 * Put away the old tdvp.
+		 * ufs(and other fs) needs size of uio_resid larger than
+		 * DIRBLKSIZ.
+		 * size of DIRBLKSIZ equals DEV_BSIZE.
+		 * (see: ufs/ufs/ufs_vnops.c ufs_readdir func , ufs/ufs/dir.h)
 		 */
-		tdvp = union_lock_upper(un, ap->a_tcnp->cn_thread);
-		vput(ap->a_tdvp);
-	}
-
-	/*
-	 * Figure out what tvp (destination file) to pass to the
-	 * lower level.
-	 *
-	 * If the uppervp file does not exist, put away the (wrong)
-	 * file and change tvp to NULL.
-	 */
+		if (uio->uio_resid <= (uio->uio_resid & (DEV_BSIZE -1)))
+			goto unionfs_readdir_exit;
 
-	if (tvp != NULLVP && tvp->v_op == &union_vnodeops) {
-		struct union_node *un = VTOUNION(tvp);
+		/*
+		 * backup cookies
+		 * It prepares to readdir in lower.
+		 */
+		if (ap->a_ncookies != NULL) {
+			ncookies_bk = *(ap->a_ncookies);
+			*(ap->a_ncookies) = 0;
+		}
+		if (ap->a_cookies != NULL) {
+			cookies_bk = *(ap->a_cookies);
+			*(ap->a_cookies) = NULL;
+		}
+	}
 
-		tvp = union_lock_upper(un, ap->a_tcnp->cn_thread);
-		vput(ap->a_tvp);
-		/* note: tvp may be NULL */
+	/* initialize for readdir in lower */
+	if (unsp->uns_readdir_status == 1) {
+		unsp->uns_readdir_status = 2;
+		uio->uio_offset = 0;
 	}
 
-	/*
-	 * VOP_RENAME() releases/vputs prior to returning, so we have no
-	 * cleanup to do.
-	 */
+	if (lvp == NULLVP) {
+		error = EBADF;
+		goto unionfs_readdir_exit;
+	}
+	/* read lower */
+	error = VOP_READDIR(lvp, uio, ap->a_cred, ap->a_eofflag,
+			    ap->a_ncookies, ap->a_cookies);
 
-	return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
+	if (cookies_bk != NULL) {
+		/* merge cookies */
+		int		size;
+		u_long         *newcookies, *pos;
 
-	/*
-	 * Error.  We still have to release / vput the various elements.
-	 */
+		size = *(ap->a_ncookies) + ncookies_bk;
+		newcookies = (u_long *) malloc(size * sizeof(u_long),
+		    M_TEMP, M_WAITOK);
+		pos = newcookies;
 
-bad:
-	vrele(fdvp);
-	if (fvp)
-		vrele(fvp);
-	vput(tdvp);
-	if (tvp != NULLVP) {
-		if (tvp != tdvp)
-			vput(tvp);
-		else
-			vrele(tvp);
+		memcpy(pos, cookies_bk, ncookies_bk * sizeof(u_long));
+		pos += ncookies_bk * sizeof(u_long);
+		memcpy(pos, *(ap->a_cookies), *(ap->a_ncookies) * sizeof(u_long));
+		free(cookies_bk, M_TEMP);
+		free(*(ap->a_cookies), M_TEMP);
+		*(ap->a_ncookies) = size;
+		*(ap->a_cookies) = newcookies;
 	}
+
+unionfs_readdir_exit:
+	if (error != 0 && ap->a_eofflag != NULL)
+		*(ap->a_eofflag) = 1;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_readdir: leave (%d)\n", error);
+
 	return (error);
 }
 
 static int
-union_mkdir(ap)
-	struct vop_mkdir_args /* {
-		struct vnode *a_dvp;
-		struct vnode **a_vpp;
-		struct componentname *a_cnp;
-		struct vattr *a_vap;
-	} */ *ap;
-{
-	struct union_node *dun = VTOUNION(ap->a_dvp);
-	struct componentname *cnp = ap->a_cnp;
-	struct thread *td = cnp->cn_thread;
-	struct vnode *upperdvp;
-	int error = EROFS;
+unionfs_readlink(struct vop_readlink_args *ap)
+{
+	int error;
+	struct unionfs_node *unp;
+	struct vnode   *vp;
 
-	if ((upperdvp = union_lock_upper(dun, td)) != NULLVP) {
-		struct vnode *vp;
+	UNIONFS_INTERNAL_DEBUG("unionfs_readlink: enter\n");
 
-		error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap);
-		union_unlock_upper(upperdvp, td);
+	unp = VTOUNIONFS(ap->a_vp);
+	vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
+
+	error = VOP_READLINK(vp, ap->a_uio, ap->a_cred);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_readlink: leave (%d)\n", error);
 
-		if (error == 0) {
-			VOP_UNLOCK(vp, 0, td);
-			UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n", vp, vrefcnt(vp)));
-			error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount,
-				ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1);
-			UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vrefcnt(vp)));
-		}
-	}
 	return (error);
 }
 
 static int
-union_rmdir(ap)
-	struct vop_rmdir_args /* {
-		struct vnode *a_dvp;
-		struct vnode *a_vp;
-		struct componentname *a_cnp;
-	} */ *ap;
+unionfs_getwritemount(struct vop_getwritemount_args *ap)
 {
-	struct union_node *dun = VTOUNION(ap->a_dvp);
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct componentname *cnp = ap->a_cnp;
-	struct thread *td = cnp->cn_thread;
-	struct vnode *upperdvp;
-	struct vnode *uppervp;
-	int error;
+	int		error;
+	struct vnode   *uvp;
+	struct vnode   *vp;
 
-	if ((upperdvp = union_lock_upper(dun, td)) == NULLVP)
-		panic("union rmdir: null upper vnode");
+	UNIONFS_INTERNAL_DEBUG("unionfs_getwritemount: enter\n");
 
-	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
-		if (union_dowhiteout(un, cnp->cn_cred, td))
-			cnp->cn_flags |= DOWHITEOUT;
-		if (cnp->cn_flags & DOWHITEOUT)		/* XXX fs corruption */
+	error = 0;
+	vp = ap->a_vp;
+
+	if (vp == NULLVP || (vp->v_mount->mnt_flag & MNT_RDONLY))
+		return (EACCES);
+
+	uvp = UNIONFSVPTOUPPERVP(vp);
+	if (uvp == NULLVP && VREG == vp->v_type)
+		uvp = UNIONFSVPTOUPPERVP(VTOUNIONFS(vp)->un_dvp);
+
+	if (uvp != NULLVP)
+		error = VOP_GETWRITEMOUNT(uvp, ap->a_mpp);
+	else {
+		VI_LOCK(vp);
+		if (vp->v_iflag & VI_FREE)
 			error = EOPNOTSUPP;
 		else
-			error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp);
-		if (!error)
-			union_removed_upper(un);
-		union_unlock_upper(uppervp, td);
-	} else {
-		error = union_mkwhiteout(
-			    MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
-			    dun->un_uppervp, ap->a_cnp, un->un_path);
+			error = EACCES;
+		VI_UNLOCK(vp);
 	}
-	union_unlock_upper(upperdvp, td);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_getwritemount: leave (%d)\n", error);
+
 	return (error);
 }
 
-/*
- *	union_symlink:
- *
- *	dvp is locked on entry and remains locked on return.  a_vpp is garbage
- *	(unused).
- */
-
 static int
-union_symlink(ap)
-	struct vop_symlink_args /* {
-		struct vnode *a_dvp;
-		struct vnode **a_vpp;
-		struct componentname *a_cnp;
-		struct vattr *a_vap;
-		char *a_target;
-	} */ *ap;
-{
-	struct union_node *dun = VTOUNION(ap->a_dvp);
-	struct componentname *cnp = ap->a_cnp;
-	struct thread *td = cnp->cn_thread;
-	struct vnode *dvp;
-	int error = EROFS;
-
-	if ((dvp = union_lock_upper(dun, td)) != NULLVP) {
-		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
-			    ap->a_target);
-		union_unlock_upper(dvp, td);
-	}
-	return (error);
+unionfs_inactive(struct vop_inactive_args *ap)
+{
+	ap->a_vp->v_object = NULL;
+	vrecycle(ap->a_vp, ap->a_td);
+	return (0);
 }
 
-/*
- * union_readdir ()works in concert with getdirentries() and
- * readdir(3) to provide a list of entries in the unioned
- * directories.  getdirentries()  is responsible for walking
- * down the union stack.  readdir(3) is responsible for
- * eliminating duplicate names from the returned data stream.
- */
 static int
-union_readdir(ap)
-	struct vop_readdir_args /* {
-		struct vnode *a_vp;
-		struct uio *a_uio;
-		struct ucred *a_cred;
-		int *a_eofflag;
-		u_long *a_cookies;
-		int a_ncookies;
-	} */ *ap;
-{
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct thread *td = ap->a_uio->uio_td;
-	struct vnode *uvp;
-	int error = 0;
-
-	if ((uvp = union_lock_upper(un, td)) != NULLVP) {
-		ap->a_vp = uvp;
-		error = VOP_READDIR_AP(ap);
-		union_unlock_upper(uvp, td);
-	}
-	return(error);
+unionfs_reclaim(struct vop_reclaim_args *ap)
+{
+	/* UNIONFS_INTERNAL_DEBUG("unionfs_reclaim: enter\n"); */
+
+	unionfs_noderem(ap->a_vp, ap->a_td);
+
+	/* UNIONFS_INTERNAL_DEBUG("unionfs_reclaim: leave\n"); */
+
+	return (0);
 }
 
 static int
-union_readlink(ap)
-	struct vop_readlink_args /* {
-		struct vnode *a_vp;
-		struct uio *a_uio;
-		struct ucred *a_cred;
-	} */ *ap;
+unionfs_print(struct vop_print_args *ap)
 {
-	int error;
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct uio *uio = ap->a_uio;
-	struct thread *td = uio->uio_td;
-	struct vnode *vp;
+	struct unionfs_node *unp;
+	/* struct unionfs_node_status *unsp; */
+
+	unp = VTOUNIONFS(ap->a_vp);
+	/* unionfs_get_node_status(unp, curthread, &unsp); */
 
-	vp = union_lock_other(un, td);
-	KASSERT(vp != NULL, ("union_readlink: backing vnode missing!"));
+	printf("unionfs_vp=%p, uppervp=%p, lowervp=%p\n",
+	    ap->a_vp, unp->un_uppervp, unp->un_lowervp);
+	/*
+	printf("unionfs opencnt: uppervp=%d, lowervp=%d\n",
+	    unsp->uns_upper_opencnt, unsp->uns_lower_opencnt);
+	*/
 
-	ap->a_vp = vp;
-	error = VOP_READLINK_AP(ap);
-	union_unlock_other(vp, td);
+	if (unp->un_uppervp != NULLVP)
+		vprint("unionfs: upper", unp->un_uppervp);
+	if (unp->un_lowervp != NULLVP)
+		vprint("unionfs: lower", unp->un_lowervp);
 
-	return (error);
+	return (0);
 }
 
 static int
-union_getwritemount(ap)
-	struct vop_getwritemount_args /* {
-		struct vnode *a_vp;
-		struct mount **a_mpp;
-	} */ *ap;
+unionfs_get_llt_revlock(int flags)
 {
-	struct vnode *vp = ap->a_vp;
-	struct vnode *uvp = UPPERVP(vp);
+	int count;
 
-	if (uvp == NULL) {
-		VI_LOCK(vp);
-		if (vp->v_iflag & VI_FREE) {
-			VI_UNLOCK(vp);
-			return (EOPNOTSUPP);
+	flags &= LK_TYPE_MASK;
+	for (count = 0; un_llt[count].lock != 0; count++) {
+		if (flags == un_llt[count].lock) {
+			return un_llt[count].revlock;
 		}
-		VI_UNLOCK(vp);
-		return (EACCES);
 	}
-	return(VOP_GETWRITEMOUNT(uvp, ap->a_mpp));
-}
 
-/*
- *	union_inactive:
- *
- *	Called with the vnode locked.  We are expected to unlock the vnode.
- */
+	return 0;
+}
 
 static int
-union_inactive(ap)
-	struct vop_inactive_args /* {
-		struct vnode *a_vp;
-		struct thread *a_td;
-	} */ *ap;
+unionfs_lock(struct vop_lock1_args *ap)
 {
-	struct vnode *vp = ap->a_vp;
-	struct union_node *un = VTOUNION(vp);
+	int		error;
+	int		flags;
+	int		revlock;
+	int		uhold;
+	struct mount   *mp;
+	struct unionfs_mount *ump;
+	struct unionfs_node *unp;
+	struct vnode   *vp;
+	struct vnode   *uvp;
+	struct vnode   *lvp;
+	struct thread  *td;
 
-	/*
-	 * Do nothing (and _don't_ bypass).
-	 * Wait to vrele lowervp until reclaim,
-	 * so that until then our union_node is in the
-	 * cache and reusable.
-	 *
-	 */
+	error = 0;
+	uhold = 0;
+	flags = ap->a_flags;
+	vp = ap->a_vp;
+	td = ap->a_td;
 
-	if (un->un_dircache != NULL)
-		union_dircache_free(un);
+	if (LK_RELEASE == (flags & LK_TYPE_MASK) || !(flags & LK_TYPE_MASK))
+		return (VOP_UNLOCK(vp, flags, td));
 
-#if 0
-	if ((un->un_flags & UN_ULOCK) && un->un_uppervp) {
-		un->un_flags &= ~UN_ULOCK;
-		VOP_UNLOCK(un->un_uppervp, 0, td);
+	if ((revlock = unionfs_get_llt_revlock(flags)) == 0)
+		panic("unknown lock type: 0x%x", flags & LK_TYPE_MASK);
+
+	if ((flags & LK_INTERLOCK) == 0)
+		VI_LOCK(vp);
+
+	mp = vp->v_mount;
+	if (mp == NULL)
+		goto unionfs_lock_null_vnode;
+
+	ump = MOUNTTOUNIONFSMOUNT(mp);
+	unp = VTOUNIONFS(vp);
+	if (ump == NULL || unp == NULL)
+		goto unionfs_lock_null_vnode;
+	lvp = unp->un_lowervp;
+	uvp = unp->un_uppervp;
+
+	if ((mp->mnt_kern_flag & MNTK_MPSAFE) != 0 &&
+	    (vp->v_iflag & VI_OWEINACT) != 0)
+		flags |= LK_NOWAIT;
+
+	/*
+	 * Sometimes, lower or upper is already exclusive locked.
+	 * (ex. vfs_domount: mounted vnode is already locked.)
+	 */
+	if ((flags & LK_TYPE_MASK) == LK_EXCLUSIVE &&
+	    vp == ump->um_rootvp)
+		flags |= LK_CANRECURSE;
+
+	if (lvp != NULLVP) {
+		VI_LOCK_FLAGS(lvp, MTX_DUPOK);
+		flags |= LK_INTERLOCK;
+		vholdl(lvp);
+
+		VI_UNLOCK(vp);
+		ap->a_flags &= ~LK_INTERLOCK;
+
+		error = VOP_LOCK(lvp, flags, td);
+
+		VI_LOCK(vp);
+		unp = VTOUNIONFS(vp);
+		if (unp == NULL) {
+			VI_UNLOCK(vp);
+			if (error == 0)
+				VOP_UNLOCK(lvp, 0, td);
+			vdrop(lvp);
+			return (vop_stdlock(ap));
+		}
 	}
-#endif
 
-	if ((un->un_flags & UN_CACHED) == 0)
-		vgone(vp);
+	if (error == 0 && uvp != NULLVP) {
+		VI_LOCK_FLAGS(uvp, MTX_DUPOK);
+		flags |= LK_INTERLOCK;
+		vholdl(uvp);
+		uhold = 1;
 
-	return (0);
+		VI_UNLOCK(vp);
+		ap->a_flags &= ~LK_INTERLOCK;
+
+		error = VOP_LOCK(uvp, flags, td);
+
+		VI_LOCK(vp);
+		unp = VTOUNIONFS(vp);
+		if (unp == NULL) {
+			VI_UNLOCK(vp);
+			if (error == 0) {
+				VOP_UNLOCK(uvp, 0, td);
+				if (lvp != NULLVP)
+					VOP_UNLOCK(lvp, 0, td);
+			}
+			if (lvp != NULLVP)
+				vdrop(lvp);
+			vdrop(uvp);
+			return (vop_stdlock(ap));
+		}
+
+		if (error != 0 && lvp != NULLVP) {
+			VI_UNLOCK(vp);
+			if ((revlock & LK_TYPE_MASK) == LK_RELEASE)
+				VOP_UNLOCK(lvp, revlock, td);
+			else
+				vn_lock(lvp, revlock | LK_RETRY, td);
+			goto unionfs_lock_abort;
+		}
+	}
+
+	VI_UNLOCK(vp);
+unionfs_lock_abort:
+	if (lvp != NULLVP)
+		vdrop(lvp);
+	if (uhold != 0)
+		vdrop(uvp);
+
+	return (error);
+
+unionfs_lock_null_vnode:
+	ap->a_flags |= LK_INTERLOCK;
+	return (vop_stdlock(ap));
 }
 
 static int
-union_reclaim(ap)
-	struct vop_reclaim_args /* {
-		struct vnode *a_vp;
-	} */ *ap;
+unionfs_unlock(struct vop_unlock_args *ap)
 {
-	union_freevp(ap->a_vp);
+	int		error;
+	int		flags;
+	int		mtxlkflag;
+	int		uhold;
+	struct vnode   *vp;
+	struct vnode   *lvp;
+	struct vnode   *uvp;
+	struct unionfs_node *unp;
 
-	return (0);
+	error = 0;
+	mtxlkflag = 0;
+	uhold = 0;
+	flags = ap->a_flags | LK_RELEASE;
+	vp = ap->a_vp;
+
+	if ((flags & LK_INTERLOCK) != 0)
+		mtxlkflag = 1;
+	else if (mtx_owned(VI_MTX(vp)) == 0) {
+		VI_LOCK(vp);
+		mtxlkflag = 2;
+	}
+
+	unp = VTOUNIONFS(vp);
+	if (unp == NULL)
+		goto unionfs_unlock_null_vnode;
+	lvp = unp->un_lowervp;
+	uvp = unp->un_uppervp;
+
+	if (lvp != NULLVP) {
+		VI_LOCK_FLAGS(lvp, MTX_DUPOK);
+		flags |= LK_INTERLOCK;
+		vholdl(lvp);
+
+		VI_UNLOCK(vp);
+		ap->a_flags &= ~LK_INTERLOCK;
+
+		error = VOP_UNLOCK(lvp, flags, ap->a_td);
+
+		VI_LOCK(vp);
+	}
+
+	if (error == 0 && uvp != NULLVP) {
+		VI_LOCK_FLAGS(uvp, MTX_DUPOK);
+		flags |= LK_INTERLOCK;
+		vholdl(uvp);
+		uhold = 1;
+
+		VI_UNLOCK(vp);
+		ap->a_flags &= ~LK_INTERLOCK;
+
+		error = VOP_UNLOCK(uvp, flags, ap->a_td);
+
+		VI_LOCK(vp);
+	}
+
+	VI_UNLOCK(vp);
+	if (lvp != NULLVP)
+		vdrop(lvp);
+	if (uhold != 0)
+		vdrop(uvp);
+	if (mtxlkflag == 0)
+		VI_LOCK(vp);
+
+	return error;
+
+unionfs_unlock_null_vnode:
+	if (mtxlkflag == 2)
+		VI_UNLOCK(vp);
+	return (vop_stdunlock(ap));
 }
 
 static int
-union_print(ap)
-	struct vop_print_args /* {
-		struct vnode *a_vp;
-	} */ *ap;
-{
-	struct vnode *vp = ap->a_vp;
-
-	printf("\tvp=%p, uppervp=%p, lowervp=%p\n",
-	       vp, UPPERVP(vp), LOWERVP(vp));
-	if (UPPERVP(vp) != NULLVP)
-		vprint("union: upper", UPPERVP(vp));
-	if (LOWERVP(vp) != NULLVP)
-		vprint("union: lower", LOWERVP(vp));
+unionfs_pathconf(struct vop_pathconf_args *ap)
+{
+	struct unionfs_node *unp;
+	struct vnode   *vp;
 
-	return (0);
+	unp = VTOUNIONFS(ap->a_vp);
+	vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
+
+	return (VOP_PATHCONF(vp, ap->a_name, ap->a_retval));
 }
 
 static int
-union_pathconf(ap)
-	struct vop_pathconf_args /* {
-		struct vnode *a_vp;
-		int a_name;
-		int *a_retval;
-	} */ *ap;
+unionfs_advlock(struct vop_advlock_args *ap)
 {
 	int error;
-	struct thread *td = curthread;		/* XXX */
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *vp;
+	struct unionfs_node *unp;
+	struct unionfs_node_status *unsp;
+	struct vnode   *vp;
+	struct vnode   *uvp;
+	struct thread  *td;
 
-	vp = union_lock_other(un, td);
-	KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!"));
+	UNIONFS_INTERNAL_DEBUG("unionfs_advlock: enter\n");
 
-	ap->a_vp = vp;
-	error = VOP_PATHCONF_AP(ap);
-	union_unlock_other(vp, td);
+	vp = ap->a_vp;
+	td = curthread;
 
-	return (error);
-}
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 
-static int
-union_advlock(ap)
-	struct vop_advlock_args /* {
-		struct vnode *a_vp;
-		caddr_t  a_id;
-		int  a_op;
-		struct flock *a_fl;
-		int  a_flags;
-	} */ *ap;
-{
-	register struct vnode *ovp = OTHERVP(ap->a_vp);
+	unp = VTOUNIONFS(ap->a_vp);
+	uvp = unp->un_uppervp;
 
-	ap->a_vp = ovp;
-	return (VOP_ADVLOCK_AP(ap));
-}
+	if (uvp == NULLVP) {
+		error = unionfs_copyfile(unp, 1, td->td_ucred, td);
+		if (error != 0)
+			goto unionfs_advlock_abort;
+		uvp = unp->un_uppervp;
 
+		unionfs_get_node_status(unp, td, &unsp);
+		if (unsp->uns_lower_opencnt > 0) {
+			/* try reopen the vnode */
+			error = VOP_OPEN(uvp, unsp->uns_lower_openmode,
+				td->td_ucred, td, NULL);
+			if (error)
+				goto unionfs_advlock_abort;
+			unsp->uns_upper_opencnt++;
+			VOP_CLOSE(unp->un_lowervp, unsp->uns_lower_openmode, td->td_ucred, td);
+			unsp->uns_lower_opencnt--;
+		} else
+			unionfs_tryrem_node_status(unp, td, unsp);
+	}
+
+	VOP_UNLOCK(vp, 0, td);
+
+	error = VOP_ADVLOCK(uvp, ap->a_id, ap->a_op, ap->a_fl, ap->a_flags);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_advlock: leave (%d)\n", error);
+
+	return error;
+
+unionfs_advlock_abort:
+	VOP_UNLOCK(vp, 0, td);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_advlock: leave (%d)\n", error);
+
+	return error;
+}
 
-/*
- * XXX - vop_strategy must be hand coded because it has no
- * YYY - and it is not coherent with anything
- *
- * vnode in its arguments.
- * This goes away with a merged VM/buffer cache.
- */
 static int
-union_strategy(ap)
-	struct vop_strategy_args /* {
-		struct vnode *a_vp;
-		struct buf *a_bp;
-	} */ *ap;
+unionfs_strategy(struct vop_strategy_args *ap)
 {
-	struct buf *bp = ap->a_bp;
-	struct vnode *othervp = OTHERVP(ap->a_vp);
+	struct unionfs_node *unp;
+	struct vnode   *vp;
+
+	unp = VTOUNIONFS(ap->a_vp);
+	vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
 
 #ifdef DIAGNOSTIC
-	if (othervp == NULLVP)
-		panic("union_strategy: nil vp");
-	if ((bp->b_iocmd == BIO_WRITE) &&
-	    (othervp == LOWERVP(ap->a_vp)))
-		panic("union_strategy: writing to lowervp");
+	if (vp == NULLVP)
+		panic("unionfs_strategy: nullvp");
+
+	if (ap->a_bp->b_iocmd == BIO_WRITE && vp == unp->un_lowervp)
+		panic("unionfs_strategy: writing to lowervp");
 #endif
-	return (VOP_STRATEGY(othervp, bp));
+
+	return (VOP_STRATEGY(vp, ap->a_bp));
 }
 
 static int
-union_getacl(ap)
-	struct vop_getacl_args /* {
-		struct vnode *a_vp;
-		acl_type_t a_type;
-		struct acl *a_aclp;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
+unionfs_getacl(struct vop_getacl_args *ap)
 {
-	int error;
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *vp;
+	int		error;
+	struct unionfs_node *unp;
+	struct vnode   *vp;
+
+	unp = VTOUNIONFS(ap->a_vp);
+	vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_getacl: enter\n");
+
+	error = VOP_GETACL(vp, ap->a_type, ap->a_aclp, ap->a_cred, ap->a_td);
 
-	vp = union_lock_other(un, ap->a_td);
-	ap->a_vp = vp;
-	error = VOP_GETACL_AP(ap);
-	union_unlock_other(vp, ap->a_td);
+	UNIONFS_INTERNAL_DEBUG("unionfs_getacl: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
-union_setacl(ap)
-	struct vop_setacl_args /* {
-		struct vnode *a_vp;
-		acl_type_t a_type;
-		struct acl *a_aclp;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
+unionfs_setacl(struct vop_setacl_args *ap)
 {
-	int error;
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *vp;
+	int		error;
+	struct unionfs_node *unp;
+	struct vnode   *uvp;
+	struct vnode   *lvp;
+	struct thread  *td;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_setacl: enter\n");
+
+	error = EROFS;
+	unp = VTOUNIONFS(ap->a_vp);
+	uvp = unp->un_uppervp;
+	lvp = unp->un_lowervp;
+	td = ap->a_td;
+
+	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
+		return (EROFS);
+
+	if (uvp == NULLVP && lvp->v_type == VREG) {
+		if ((error = unionfs_copyfile(unp, 1, ap->a_cred, td)) != 0)
+			return (error);
+		uvp = unp->un_uppervp;
+	}
+
+	if (uvp != NULLVP)
+		error = VOP_SETACL(uvp, ap->a_type, ap->a_aclp, ap->a_cred, td);
 
-	vp = union_lock_other(un, ap->a_td);
-	ap->a_vp = vp;
-	error = VOP_SETACL_AP(ap);
-	union_unlock_other(vp, ap->a_td);
+	UNIONFS_INTERNAL_DEBUG("unionfs_setacl: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
-union_aclcheck(ap)
-	struct vop_aclcheck_args /* {
-		struct vnode *a_vp;
-		acl_type_t a_type;
-		struct acl *a_aclp;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
+unionfs_aclcheck(struct vop_aclcheck_args *ap)
 {
-	struct vnode *ovp = OTHERVP(ap->a_vp);
+	int		error;
+	struct unionfs_node *unp;
+	struct vnode   *vp;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_aclcheck: enter\n");
+
+	unp = VTOUNIONFS(ap->a_vp);
+	vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
 
-	ap->a_vp = ovp;
-	return (VOP_ACLCHECK_AP(ap));
+	error = VOP_ACLCHECK(vp, ap->a_type, ap->a_aclp, ap->a_cred, ap->a_td);
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_aclcheck: leave (%d)\n", error);
+
+	return (error);
 }
 
 static int
-union_closeextattr(ap)
-	struct vop_closeextattr_args /* {
-		struct vnode *a_vp;
-		int a_commit;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
+unionfs_openextattr(struct vop_openextattr_args *ap)
 {
-	int error;
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *vp;
+	int		error;
+	struct unionfs_node *unp;
+	struct vnode   *vp;
+	struct vnode   *tvp;
 
-	vp = union_lock_other(un, ap->a_td);
-	ap->a_vp = vp;
-	error = VOP_CLOSEEXTATTR_AP(ap);
-	union_unlock_other(vp, ap->a_td);
+	vp = ap->a_vp;
+	unp = VTOUNIONFS(vp);
+	tvp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
+
+	if ((tvp == unp->un_uppervp && (unp->un_flag & UNIONFS_OPENEXTU)) ||
+	    (tvp == unp->un_lowervp && (unp->un_flag & UNIONFS_OPENEXTL)))
+		return (EBUSY);
+
+	error = VOP_OPENEXTATTR(tvp, ap->a_cred, ap->a_td);
+
+	if (error == 0) {
+		vn_lock(vp, LK_UPGRADE | LK_RETRY, ap->a_td);
+		if (tvp == unp->un_uppervp)
+			unp->un_flag |= UNIONFS_OPENEXTU;
+		else
+			unp->un_flag |= UNIONFS_OPENEXTL;
+		vn_lock(vp, LK_DOWNGRADE | LK_RETRY, ap->a_td);
+	}
 
 	return (error);
 }
 
 static int
-union_getextattr(ap)
-	struct vop_getextattr_args /* {
-		struct vnode *a_vp;
-		int a_attrnamespace;
-		const char *a_name;
-		struct uio *a_uio;
-		size_t *a_size;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
+unionfs_closeextattr(struct vop_closeextattr_args *ap)
 {
-	int error;
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *vp;
+	int		error;
+	struct unionfs_node *unp;
+	struct vnode   *vp;
+	struct vnode   *tvp;
+
+	vp = ap->a_vp;
+	unp = VTOUNIONFS(vp);
+	tvp = NULLVP;
 
-	vp = union_lock_other(un, ap->a_td);
-	ap->a_vp = vp;
-	error = VOP_GETEXTATTR_AP(ap);
-	union_unlock_other(vp, ap->a_td);
+	if (unp->un_flag & UNIONFS_OPENEXTU)
+		tvp = unp->un_uppervp;
+	else if (unp->un_flag & UNIONFS_OPENEXTL)
+		tvp = unp->un_lowervp;
+
+	if (tvp == NULLVP)
+		return (EOPNOTSUPP);
+
+	error = VOP_CLOSEEXTATTR(tvp, ap->a_commit, ap->a_cred, ap->a_td);
+
+	if (error == 0) {
+		vn_lock(vp, LK_UPGRADE | LK_RETRY, ap->a_td);
+		if (tvp == unp->un_uppervp)
+			unp->un_flag &= ~UNIONFS_OPENEXTU;
+		else
+			unp->un_flag &= ~UNIONFS_OPENEXTL;
+		vn_lock(vp, LK_DOWNGRADE | LK_RETRY, ap->a_td);
+	}
 
 	return (error);
 }
 
 static int
-union_listextattr(ap)
-	struct vop_listextattr_args /* {
-		struct vnode *a_vp;
-		int a_attrnamespace;
-		struct uio *a_uio;
-		size_t *a_size;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
+unionfs_getextattr(struct vop_getextattr_args *ap)
 {
-	int error;
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *vp;
+	struct unionfs_node *unp;
+	struct vnode   *vp;
 
-	vp = union_lock_other(un, ap->a_td);
-	ap->a_vp = vp;
-	error = VOP_LISTEXTATTR_AP(ap);
-	union_unlock_other(vp, ap->a_td);
+	unp = VTOUNIONFS(ap->a_vp);
+	vp = NULLVP;
 
-	return (error);
+	if (unp->un_flag & UNIONFS_OPENEXTU)
+		vp = unp->un_uppervp;
+	else if (unp->un_flag & UNIONFS_OPENEXTL)
+		vp = unp->un_lowervp;
+
+	if (vp == NULLVP)
+		return (EOPNOTSUPP);
+
+	return (VOP_GETEXTATTR(vp, ap->a_attrnamespace, ap->a_name,
+	    ap->a_uio, ap->a_size, ap->a_cred, ap->a_td));
 }
 
 static int
-union_openextattr(ap)
-	struct vop_openextattr_args /* {
-		struct vnode *a_vp;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
+unionfs_setextattr(struct vop_setextattr_args *ap)
 {
-	int error;
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *vp;
+	int		error;
+	struct unionfs_node *unp;
+	struct vnode   *uvp;
+	struct vnode   *lvp;
+	struct vnode   *ovp;
+	struct ucred   *cred;
+	struct thread  *td;
+
+	error = EROFS;
+	unp = VTOUNIONFS(ap->a_vp);
+	uvp = unp->un_uppervp;
+	lvp = unp->un_lowervp;
+	ovp = NULLVP;
+	cred = ap->a_cred;
+	td = ap->a_td;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_setextattr: enter (un_flag=%x)\n", unp->un_flag);
+
+	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
+		return (EROFS);
 
-	vp = union_lock_other(un, ap->a_td);
-	ap->a_vp = vp;
-	error = VOP_OPENEXTATTR_AP(ap);
-	union_unlock_other(vp, ap->a_td);
+	if (unp->un_flag & UNIONFS_OPENEXTU)
+		ovp = unp->un_uppervp;
+	else if (unp->un_flag & UNIONFS_OPENEXTL)
+		ovp = unp->un_lowervp;
+
+	if (ovp == NULLVP)
+		return (EOPNOTSUPP);
+
+	if (ovp == lvp && lvp->v_type == VREG) {
+		VOP_CLOSEEXTATTR(lvp, 0, cred, td);
+		if (uvp == NULLVP &&
+		    (error = unionfs_copyfile(unp, 1, cred, td)) != 0) {
+unionfs_setextattr_reopen:
+			if ((unp->un_flag & UNIONFS_OPENEXTL) &&
+			    VOP_OPENEXTATTR(lvp, cred, td)) {
+#ifdef DIAGNOSTIC
+				panic("unionfs: VOP_OPENEXTATTR failed");
+#endif
+				unp->un_flag &= ~UNIONFS_OPENEXTL;
+			}
+			goto unionfs_setextattr_abort;
+		}
+		uvp = unp->un_uppervp;
+		if ((error = VOP_OPENEXTATTR(uvp, cred, td)) != 0)
+			goto unionfs_setextattr_reopen;
+		unp->un_flag &= ~UNIONFS_OPENEXTL;
+		unp->un_flag |= UNIONFS_OPENEXTU;
+		ovp = uvp;
+	}
+
+	if (ovp == uvp)
+		error = VOP_SETEXTATTR(ovp, ap->a_attrnamespace, ap->a_name,
+		    ap->a_uio, cred, td);
+
+unionfs_setextattr_abort:
+	UNIONFS_INTERNAL_DEBUG("unionfs_setextattr: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
-union_deleteextattr(ap)
-	struct vop_deleteextattr_args /* {
-		struct vnode *a_vp;
-		int a_attrnamespace;
-		const char *a_name;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
+unionfs_listextattr(struct vop_listextattr_args *ap)
 {
-	int error;
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *vp;
+	struct unionfs_node *unp;
+	struct vnode   *vp;
 
-	vp = union_lock_other(un, ap->a_td);
-	ap->a_vp = vp;
-	error = VOP_DELETEEXTATTR_AP(ap);
-	union_unlock_other(vp, ap->a_td);
+	unp = VTOUNIONFS(ap->a_vp);
+	vp = NULLVP;
 
-	return (error);
+	if (unp->un_flag & UNIONFS_OPENEXTU)
+		vp = unp->un_uppervp;
+	else if (unp->un_flag & UNIONFS_OPENEXTL)
+		vp = unp->un_lowervp;
+
+	if (vp == NULLVP)
+		return (EOPNOTSUPP);
+
+	return (VOP_LISTEXTATTR(vp, ap->a_attrnamespace, ap->a_uio,
+	    ap->a_size, ap->a_cred, ap->a_td));
 }
 
 static int
-union_setextattr(ap)
-	struct vop_setextattr_args /* {
-		struct vnode *a_vp;
-		int a_attrnamespace;
-		const char *a_name;
-		struct uio *a_uio;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
+unionfs_deleteextattr(struct vop_deleteextattr_args *ap)
 {
-	int error;
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *vp;
+	int		error;
+	struct unionfs_node *unp;
+	struct vnode   *uvp;
+	struct vnode   *lvp;
+	struct vnode   *ovp;
+	struct ucred   *cred;
+	struct thread  *td;
+
+	error = EROFS;
+	unp = VTOUNIONFS(ap->a_vp);
+	uvp = unp->un_uppervp;
+	lvp = unp->un_lowervp;
+	ovp = NULLVP;
+	cred = ap->a_cred;
+	td = ap->a_td;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_deleteextattr: enter (un_flag=%x)\n", unp->un_flag);
+
+	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
+		return (EROFS);
+
+	if (unp->un_flag & UNIONFS_OPENEXTU)
+		ovp = unp->un_uppervp;
+	else if (unp->un_flag & UNIONFS_OPENEXTL)
+		ovp = unp->un_lowervp;
+
+	if (ovp == NULLVP)
+		return (EOPNOTSUPP);
+
+	if (ovp == lvp && lvp->v_type == VREG) {
+		VOP_CLOSEEXTATTR(lvp, 0, cred, td);
+		if (uvp == NULLVP &&
+		    (error = unionfs_copyfile(unp, 1, cred, td)) != 0) {
+unionfs_deleteextattr_reopen:
+			if ((unp->un_flag & UNIONFS_OPENEXTL) &&
+			    VOP_OPENEXTATTR(lvp, cred, td)) {
+#ifdef DIAGNOSTIC
+				panic("unionfs: VOP_OPENEXTATTR failed");
+#endif
+				unp->un_flag &= ~UNIONFS_OPENEXTL;
+			}
+			goto unionfs_deleteextattr_abort;
+		}
+		uvp = unp->un_uppervp;
+		if ((error = VOP_OPENEXTATTR(uvp, cred, td)) != 0)
+			goto unionfs_deleteextattr_reopen;
+		unp->un_flag &= ~UNIONFS_OPENEXTL;
+		unp->un_flag |= UNIONFS_OPENEXTU;
+		ovp = uvp;
+	}
+
+	if (ovp == uvp)
+		error = VOP_DELETEEXTATTR(ovp, ap->a_attrnamespace, ap->a_name,
+		    ap->a_cred, ap->a_td);
 
-	vp = union_lock_other(un, ap->a_td);
-	ap->a_vp = vp;
-	error = VOP_SETEXTATTR_AP(ap);
-	union_unlock_other(vp, ap->a_td);
+unionfs_deleteextattr_abort:
+	UNIONFS_INTERNAL_DEBUG("unionfs_deleteextattr: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
-union_setlabel(ap)
-	struct vop_setlabel_args /* {
-		struct vnode *a_vp;
-		struct label *a_label;
-		struct ucred *a_cred;
-		struct thread *a_td;
-	} */ *ap;
+unionfs_setlabel(struct vop_setlabel_args *ap)
 {
-	int error;
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *vp;
+	int		error;
+	struct unionfs_node *unp;
+	struct vnode   *uvp;
+	struct vnode   *lvp;
+	struct thread  *td;
+
+	UNIONFS_INTERNAL_DEBUG("unionfs_setlabel: enter\n");
+
+	error = EROFS;
+	unp = VTOUNIONFS(ap->a_vp);
+	uvp = unp->un_uppervp;
+	lvp = unp->un_lowervp;
+	td = ap->a_td;
+
+	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
+		return (EROFS);
+
+	if (uvp == NULLVP && lvp->v_type == VREG) {
+		if ((error = unionfs_copyfile(unp, 1, ap->a_cred, td)) != 0)
+			return (error);
+		uvp = unp->un_uppervp;
+	}
+
+	if (uvp != NULLVP)
+		error = VOP_SETLABEL(uvp, ap->a_label, ap->a_cred, td);
 
-	vp = union_lock_other(un, ap->a_td);
-	ap->a_vp = vp;
-	error = VOP_SETLABEL_AP(ap);
-	union_unlock_other(vp, ap->a_td);
+	UNIONFS_INTERNAL_DEBUG("unionfs_setlabel: leave (%d)\n", error);
 
 	return (error);
 }
 
-/*
- * Global vfs data structures
- */
-struct vop_vector union_vnodeops = {
+static int
+unionfs_vptofh(struct vop_vptofh_args *ap)
+{
+	return (EOPNOTSUPP);
+}
+
+struct vop_vector unionfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 
-	.vop_access =		union_access,
-	.vop_aclcheck =		union_aclcheck,
-	.vop_advlock =		union_advlock,
+	.vop_access =		unionfs_access,
+	.vop_aclcheck =		unionfs_aclcheck,
+	.vop_advlock =		unionfs_advlock,
 	.vop_bmap =		VOP_EOPNOTSUPP,
-	.vop_close =		union_close,
-	.vop_closeextattr =	union_closeextattr,
-	.vop_create =		union_create,
-	.vop_deleteextattr =	union_deleteextattr,
-	.vop_fsync =		union_fsync,
-	.vop_getacl =		union_getacl,
-	.vop_getattr =		union_getattr,
-	.vop_getextattr =	union_getextattr,
-	.vop_getwritemount =	union_getwritemount,
-	.vop_inactive =		union_inactive,
-	.vop_ioctl =		union_ioctl,
-	.vop_lease =		union_lease,
-	.vop_link =		union_link,
-	.vop_listextattr =	union_listextattr,
-	.vop_lookup =		union_lookup,
-	.vop_mkdir =		union_mkdir,
-	.vop_mknod =		union_mknod,
-	.vop_open =		union_open,
-	.vop_openextattr =	union_openextattr,
-	.vop_pathconf =		union_pathconf,
-	.vop_poll =		union_poll,
-	.vop_print =		union_print,
-	.vop_read =		union_read,
-	.vop_readdir =		union_readdir,
-	.vop_readlink =		union_readlink,
-	.vop_reclaim =		union_reclaim,
-	.vop_remove =		union_remove,
-	.vop_rename =		union_rename,
-	.vop_rmdir =		union_rmdir,
-	.vop_setacl =		union_setacl,
-	.vop_setattr =		union_setattr,
-	.vop_setextattr =	union_setextattr,
-	.vop_setlabel =		union_setlabel,
-	.vop_strategy =		union_strategy,
-	.vop_symlink =		union_symlink,
-	.vop_whiteout =		union_whiteout,
-	.vop_write =		union_write,
+	.vop_cachedlookup =	unionfs_lookup,
+	.vop_close =		unionfs_close,
+	.vop_closeextattr =	unionfs_closeextattr,
+	.vop_create =		unionfs_create,
+	.vop_deleteextattr =	unionfs_deleteextattr,
+	.vop_fsync =		unionfs_fsync,
+	.vop_getacl =		unionfs_getacl,
+	.vop_getattr =		unionfs_getattr,
+	.vop_getextattr =	unionfs_getextattr,
+	.vop_getwritemount =	unionfs_getwritemount,
+	.vop_inactive =		unionfs_inactive,
+	.vop_ioctl =		unionfs_ioctl,
+	.vop_lease =		unionfs_lease,
+	.vop_link =		unionfs_link,
+	.vop_listextattr =	unionfs_listextattr,
+	.vop_lock1 =		unionfs_lock,
+	.vop_lookup =		vfs_cache_lookup,
+	.vop_mkdir =		unionfs_mkdir,
+	.vop_mknod =		unionfs_mknod,
+	.vop_open =		unionfs_open,
+	.vop_openextattr =	unionfs_openextattr,
+	.vop_pathconf =		unionfs_pathconf,
+	.vop_poll =		unionfs_poll,
+	.vop_print =		unionfs_print,
+	.vop_read =		unionfs_read,
+	.vop_readdir =		unionfs_readdir,
+	.vop_readlink =		unionfs_readlink,
+	.vop_reclaim =		unionfs_reclaim,
+	.vop_remove =		unionfs_remove,
+	.vop_rename =		unionfs_rename,
+	.vop_rmdir =		unionfs_rmdir,
+	.vop_setacl =		unionfs_setacl,
+	.vop_setattr =		unionfs_setattr,
+	.vop_setextattr =	unionfs_setextattr,
+	.vop_setlabel =		unionfs_setlabel,
+	.vop_strategy =		unionfs_strategy,
+	.vop_symlink =		unionfs_symlink,
+	.vop_unlock =		unionfs_unlock,
+	.vop_whiteout =		unionfs_whiteout,
+	.vop_write =		unionfs_write,
+	.vop_vptofh =		unionfs_vptofh,
 };
Index: union_vfsops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/unionfs/union_vfsops.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/unionfs/union_vfsops.c -L sys/fs/unionfs/union_vfsops.c -u -r1.1.1.1 -r1.2
--- sys/fs/unionfs/union_vfsops.c
+++ sys/fs/unionfs/union_vfsops.c
@@ -1,6 +1,8 @@
 /*-
  * Copyright (c) 1994, 1995 The Regents of the University of California.
  * Copyright (c) 1994, 1995 Jan-Simon Pendry.
+ * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa at ongs.co.jp>, ONGS Inc.
+ * Copyright (c) 2006 Daichi Goto <daichi at freebsd.org>
  * All rights reserved.
  *
  * This code is derived from software donated to Berkeley by
@@ -31,433 +33,464 @@
  * SUCH DAMAGE.
  *
  *	@(#)union_vfsops.c	8.20 (Berkeley) 5/20/95
- * $FreeBSD: src/sys/fs/unionfs/union_vfsops.c,v 1.76 2005/04/27 09:07:13 jeff Exp $
- */
-
-/*
- * Union Layer
+ * $FreeBSD: src/sys/fs/unionfs/union_vfsops.c,v 1.82.2.4 2007/10/22 05:41:54 daichi Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/vnode.h>
+#include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
-#include <sys/malloc.h>
-#include <sys/filedesc.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+
 #include <fs/unionfs/union.h>
 
-static MALLOC_DEFINE(M_UNIONFSMNT, "UNION mount", "UNION mount structure");
+static MALLOC_DEFINE(M_UNIONFSMNT, "UNIONFS mount", "UNIONFS mount structure");
 
-extern vfs_init_t       union_init;
-static vfs_root_t       union_root;
-static vfs_mount_t	union_mount;
-static vfs_statfs_t	union_statfs;
-static vfs_unmount_t    union_unmount;
+static vfs_fhtovp_t	unionfs_fhtovp;
+static vfs_checkexp_t	unionfs_checkexp;
+static vfs_mount_t	unionfs_domount;
+static vfs_quotactl_t	unionfs_quotactl;
+static vfs_root_t	unionfs_root;
+static vfs_sync_t	unionfs_sync;
+static vfs_statfs_t	unionfs_statfs;
+static vfs_unmount_t	unionfs_unmount;
+static vfs_vget_t	unionfs_vget;
+static vfs_extattrctl_t	unionfs_extattrctl;
+
+static struct vfsops unionfs_vfsops;
+
+/*
+ * Exchange from userland file mode to vmode.
+ */
+static u_short 
+mode2vmode(mode_t mode)
+{
+	u_short		ret;
+
+	ret = 0;
+
+	/* other */
+	if (mode & S_IXOTH)
+		ret |= VEXEC >> 6;
+	if (mode & S_IWOTH)
+		ret |= VWRITE >> 6;
+	if (mode & S_IROTH)
+		ret |= VREAD >> 6;
+
+	/* group */
+	if (mode & S_IXGRP)
+		ret |= VEXEC >> 3;
+	if (mode & S_IWGRP)
+		ret |= VWRITE >> 3;
+	if (mode & S_IRGRP)
+		ret |= VREAD >> 3;
+
+	/* owner */
+	if (mode & S_IXUSR)
+		ret |= VEXEC;
+	if (mode & S_IWUSR)
+		ret |= VWRITE;
+	if (mode & S_IRUSR)
+		ret |= VREAD;
+
+	return (ret);
+}
 
 /*
- * Mount union filesystem.
+ * Mount unionfs layer.
  */
 static int
-union_mount(mp, td)
-	struct mount *mp;
-	struct thread *td;
-{
-	int error = 0;
-	struct vfsoptlist *opts;
-	struct vnode *lowerrootvp = NULLVP;
-	struct vnode *upperrootvp = NULLVP;
-	struct union_mount *um = 0;
-	struct vattr va;
-	char *cp = 0, *target;
-	int op;
-	int len;
-	size_t size;
+unionfs_domount(struct mount *mp, struct thread *td)
+{
+	int		error;
+	struct vnode   *lowerrootvp;
+	struct vnode   *upperrootvp;
+	struct unionfs_mount *ump;
+	char           *target;
+	char           *tmp;
+	char           *ep;
+	int		len;
+	size_t		done;
+	int		below;
+	uid_t		uid;
+	gid_t		gid;
+	u_short		udir;
+	u_short		ufile;
+	unionfs_copymode copymode;
+	unionfs_whitemode whitemode;
 	struct componentname fakecn;
-	struct nameidata nd, *ndp = &nd;
+	struct nameidata nd, *ndp;
+	struct vattr	va;
 
-	UDEBUG(("union_mount(mp = %p)\n", (void *)mp));
+	UNIONFSDEBUG("unionfs_mount(mp = %p)\n", (void *)mp);
 
-	opts = mp->mnt_optnew;
-	/*
-	 * Disable clustered write, otherwise system becomes unstable.
-	 */
-	mp->mnt_flag |= MNT_NOCLUSTERW;
+	error = 0;
+	below = 0;
+	uid = 0;
+	gid = 0;
+	udir = 0;
+	ufile = 0;
+	copymode = UNIONFS_TRANSPARENT;	/* default */
+	whitemode = UNIONFS_WHITE_ALWAYS;
+	ndp = &nd;
 
-	if (mp->mnt_flag & MNT_ROOTFS)
+	if (mp->mnt_flag & MNT_ROOTFS) {
+		vfs_mount_error(mp, "Cannot union mount root filesystem");
 		return (EOPNOTSUPP);
+	}
+
 	/*
-	 * Update is a no-op
+	 * Update is a no operation.
 	 */
-	if (mp->mnt_flag & MNT_UPDATE)
-		/*
-		 * Need to provide:
-		 * 1. a way to convert between rdonly and rdwr mounts.
-		 * 2. support for nfs exports.
-		 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		vfs_mount_error(mp, "unionfs does not support mount update");
 		return (EOPNOTSUPP);
+	}
 
 	/*
-	 * Get arguments.
+	 * Get argument
 	 */
-	error = vfs_getopt(opts, "target", (void **)&target, &len);
-	if (error || target[len - 1] != '\0')
+	error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len);
+	if (error)
+		error = vfs_getopt(mp->mnt_optnew, "from", (void **)&target,
+		    &len);
+	if (error || target[len - 1] != '\0') {
+		vfs_mount_error(mp, "Invalid target");
 		return (EINVAL);
-
-	op = 0;
-	if (vfs_getopt(opts, "below", NULL, NULL) == 0)
-		op = UNMNT_BELOW;
-	if (vfs_getopt(opts, "replace", NULL, NULL) == 0) {
-		/* These options are mutually exclusive. */
-		if (op)
+	}
+	if (vfs_getopt(mp->mnt_optnew, "below", NULL, NULL) == 0)
+		below = 1;
+	if (vfs_getopt(mp->mnt_optnew, "udir", (void **)&tmp, NULL) == 0) {
+		if (tmp != NULL)
+			udir = (mode_t)strtol(tmp, &ep, 8);
+		if (tmp == NULL || *ep) {
+			vfs_mount_error(mp, "Invalid udir");
 			return (EINVAL);
-		op = UNMNT_REPLACE;
+		}
+		udir = mode2vmode(udir);
 	}
-	/*
-	 * UNMNT_ABOVE is the default.
-	 */
-	if (op == 0)
-		op = UNMNT_ABOVE;
-
-	/*
-	 * Obtain lower vnode.  Vnode is stored in mp->mnt_vnodecovered.
-	 * We need to reference it but not lock it.
-	 */
-	lowerrootvp = mp->mnt_vnodecovered;
-	VREF(lowerrootvp);
-	/*
-	 * Obtain upper vnode by calling namei() on the path.  The
-	 * upperrootvp will be turned referenced and locked.
-	 */
-	NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target, td);
-	error = namei(ndp);
+	if (vfs_getopt(mp->mnt_optnew, "ufile", (void **)&tmp, NULL) == 0) {
+		if (tmp != NULL)
+			ufile = (mode_t)strtol(tmp, &ep, 8);
+		if (tmp == NULL || *ep) {
+			vfs_mount_error(mp, "Invalid ufile");
+			return (EINVAL);
+		}
+		ufile = mode2vmode(ufile);
+	}
+	/* check umask, uid and gid */
+	if (udir == 0 && ufile != 0)
+		udir = ufile;
+	if (ufile == 0 && udir != 0)
+		ufile = udir;
+
+	vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY, td);
+	error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred, td);
+	if (!error) {
+		if (udir == 0)
+			udir = va.va_mode;
+		if (ufile == 0)
+			ufile = va.va_mode;
+		uid = va.va_uid;
+		gid = va.va_gid;
+	}
+	VOP_UNLOCK(mp->mnt_vnodecovered, 0, td);
 	if (error)
-		goto bad;
-	NDFREE(ndp, NDF_ONLY_PNBUF);
-	upperrootvp = ndp->ni_vp;
-
-	UDEBUG(("mount_root UPPERVP %p locked = %d\n", upperrootvp,
-	    VOP_ISLOCKED(upperrootvp, NULL)));
+		return (error);
 
-	/*
-	 * Check multi union mount to avoid `lock myself again' panic.
-	 * Also require that it be a directory.
-	 */
-	if (upperrootvp == VTOUNION(lowerrootvp)->un_uppervp) {
-#ifdef DIAGNOSTIC
-		printf("union_mount: multi union mount?\n");
-#endif
-		error = EDEADLK;
-		goto bad;
+	if (mp->mnt_cred->cr_ruid == 0) {	/* root only */
+		if (vfs_getopt(mp->mnt_optnew, "uid", (void **)&tmp,
+		    NULL) == 0) {
+			if (tmp != NULL)
+				uid = (uid_t)strtol(tmp, &ep, 10);
+			if (tmp == NULL || *ep) {
+				vfs_mount_error(mp, "Invalid uid");
+				return (EINVAL);
+			}
+		}
+		if (vfs_getopt(mp->mnt_optnew, "gid", (void **)&tmp,
+		    NULL) == 0) {
+			if (tmp != NULL)
+				gid = (gid_t)strtol(tmp, &ep, 10);
+			if (tmp == NULL || *ep) {
+				vfs_mount_error(mp, "Invalid gid");
+				return (EINVAL);
+			}
+		}
+		if (vfs_getopt(mp->mnt_optnew, "copymode", (void **)&tmp,
+		    NULL) == 0) {
+			if (tmp == NULL) {
+				vfs_mount_error(mp, "Invalid copymode");
+				return (EINVAL);
+			} else if (strcasecmp(tmp, "traditional") == 0)
+				copymode = UNIONFS_TRADITIONAL;
+			else if (strcasecmp(tmp, "transparent") == 0)
+				copymode = UNIONFS_TRANSPARENT;
+			else if (strcasecmp(tmp, "masquerade") == 0)
+				copymode = UNIONFS_MASQUERADE;
+			else {
+				vfs_mount_error(mp, "Invalid copymode");
+				return (EINVAL);
+			}
+		}
+		if (vfs_getopt(mp->mnt_optnew, "whiteout", (void **)&tmp,
+		    NULL) == 0) {
+			if (tmp == NULL) {
+				vfs_mount_error(mp, "Invalid whiteout mode");
+				return (EINVAL);
+			} else if (strcasecmp(tmp, "always") == 0)
+				whitemode = UNIONFS_WHITE_ALWAYS;
+			else if (strcasecmp(tmp, "whenneeded") == 0)
+				whitemode = UNIONFS_WHITE_WHENNEEDED;
+			else {
+				vfs_mount_error(mp, "Invalid whiteout mode");
+				return (EINVAL);
+			}
+		}
 	}
-
-	if (upperrootvp->v_type != VDIR) {
-		error = EINVAL;
-		goto bad;
+	/* If copymode is UNIONFS_TRADITIONAL, uid/gid is mounted user. */
+	if (copymode == UNIONFS_TRADITIONAL) {
+		uid = mp->mnt_cred->cr_ruid;
+		gid = mp->mnt_cred->cr_rgid;
 	}
 
+	UNIONFSDEBUG("unionfs_mount: uid=%d, gid=%d\n", uid, gid);
+	UNIONFSDEBUG("unionfs_mount: udir=0%03o, ufile=0%03o\n", udir, ufile);
+	UNIONFSDEBUG("unionfs_mount: copymode=%d\n", copymode);
+
 	/*
-	 * Allocate our union_mount structure and populate the fields.
-	 * The vnode references are stored in the union_mount as held,
-	 * unlocked references.  Depending on the _BELOW flag, the
-	 * filesystems are viewed in a different order.  In effect this
-	 * is the same as providing a mount-under option to the mount
-	 * syscall.
+	 * Find upper node
 	 */
+	NDINIT(ndp, LOOKUP, FOLLOW | WANTPARENT | LOCKLEAF, UIO_SYSSPACE, target, td);
+	if ((error = namei(ndp)))
+		return (error);
 
-	um = (struct union_mount *) malloc(sizeof(struct union_mount),
-				M_UNIONFSMNT, M_WAITOK | M_ZERO);
-
-	um->um_op = op;
+	NDFREE(ndp, NDF_ONLY_PNBUF);
 
-	error = VOP_GETATTR(upperrootvp, &va, td->td_ucred, td);
-	if (error)
-		goto bad;
+	/* get root vnodes */
+	lowerrootvp = mp->mnt_vnodecovered;
+	upperrootvp = ndp->ni_vp;
 
-	um->um_upperdev = va.va_fsid;
+	vrele(ndp->ni_dvp);
+	ndp->ni_dvp = NULLVP;
 
-	switch (um->um_op) {
-	case UNMNT_ABOVE:
-		um->um_lowervp = lowerrootvp;
-		um->um_uppervp = upperrootvp;
-		upperrootvp = NULL;
-		lowerrootvp = NULL;
-		break;
+	/* create unionfs_mount */
+	ump = (struct unionfs_mount *)malloc(sizeof(struct unionfs_mount),
+	    M_UNIONFSMNT, M_WAITOK | M_ZERO);
 
-	case UNMNT_BELOW:
+	/*
+	 * Save reference
+	 */
+	if (below) {
 		VOP_UNLOCK(upperrootvp, 0, td);
-		vn_lock(lowerrootvp, LK_RETRY|LK_EXCLUSIVE, td);
-		um->um_lowervp = upperrootvp;
-		um->um_uppervp = lowerrootvp;
-		upperrootvp = NULL;
-		lowerrootvp = NULL;
-		break;
-
-	case UNMNT_REPLACE:
-		vrele(lowerrootvp);
-		lowerrootvp = NULL;
-		um->um_uppervp = upperrootvp;
-		um->um_lowervp = lowerrootvp;
-		upperrootvp = NULL;
-		break;
-
-	default:
-		error = EINVAL;
-		goto bad;
+		vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY, td);
+		ump->um_lowervp = upperrootvp;
+		ump->um_uppervp = lowerrootvp;
+	} else {
+		ump->um_lowervp = lowerrootvp;
+		ump->um_uppervp = upperrootvp;
 	}
+	ump->um_rootvp = NULLVP;
+	ump->um_uid = uid;
+	ump->um_gid = gid;
+	ump->um_udir = udir;
+	ump->um_ufile = ufile;
+	ump->um_copymode = copymode;
+	ump->um_whitemode = whitemode;
+
+	MNT_ILOCK(mp);
+	if ((lowerrootvp->v_mount->mnt_kern_flag & MNTK_MPSAFE) &&
+	    (upperrootvp->v_mount->mnt_kern_flag & MNTK_MPSAFE))
+		mp->mnt_kern_flag |= MNTK_MPSAFE;
+	MNT_IUNLOCK(mp);
+	mp->mnt_data = (qaddr_t)ump;
 
 	/*
-	 * Unless the mount is readonly, ensure that the top layer
-	 * supports whiteout operations.
+	 * Copy upper layer's RDONLY flag.
+	 */
+	mp->mnt_flag |= ump->um_uppervp->v_mount->mnt_flag & MNT_RDONLY;
+
+	/*
+	 * Check whiteout
 	 */
 	if ((mp->mnt_flag & MNT_RDONLY) == 0) {
-		/*
-		 * XXX Fake up a struct componentname with only cn_nameiop
-		 * and cn_thread valid; union_whiteout() needs to use the
-		 * thread pointer to lock the vnode.
-		 */
-		bzero(&fakecn, sizeof(fakecn));
+		memset(&fakecn, 0, sizeof(fakecn));
 		fakecn.cn_nameiop = LOOKUP;
 		fakecn.cn_thread = td;
-		error = VOP_WHITEOUT(um->um_uppervp, &fakecn, LOOKUP);
-		if (error)
-			goto bad;
+		error = VOP_WHITEOUT(ump->um_uppervp, &fakecn, LOOKUP);
+		if (error) {
+			if (below) {
+				VOP_UNLOCK(ump->um_uppervp, 0, td);
+				vrele(upperrootvp);
+			} else
+				vput(ump->um_uppervp);
+			free(ump, M_UNIONFSMNT);
+			mp->mnt_data = NULL;
+			return (error);
+		}
 	}
-	VOP_UNLOCK(um->um_uppervp, 0, td);
 
-	um->um_cred = crhold(td->td_ucred);
-	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
-	um->um_cmode = UN_DIRMODE &~ td->td_proc->p_fd->fd_cmask;
-	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
+	/*
+	 * Unlock the node
+	 */
+	VOP_UNLOCK(ump->um_uppervp, 0, td);
 
 	/*
-	 * Depending on what you think the MNT_LOCAL flag might mean,
-	 * you may want the && to be || on the conditional below.
-	 * At the moment it has been defined that the filesystem is
-	 * only local if it is all local, ie the MNT_LOCAL flag implies
-	 * that the entire namespace is local.  If you think the MNT_LOCAL
-	 * flag implies that some of the files might be stored locally
-	 * then you will want to change the conditional.
+	 * Get the unionfs root vnode.
 	 */
-	if (um->um_op == UNMNT_ABOVE) {
-		if (((um->um_lowervp == NULLVP) ||
-		     (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) &&
-		    (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
-			mp->mnt_flag |= MNT_LOCAL;
+	error = unionfs_nodeget(mp, ump->um_uppervp, ump->um_lowervp,
+	    NULLVP, &(ump->um_rootvp), NULL, td);
+	vrele(upperrootvp);
+	if (error) {
+		free(ump, M_UNIONFSMNT);
+		mp->mnt_data = NULL;
+		return (error);
 	}
 
 	/*
-	 * Copy in the upper layer's RDONLY flag.  This is for the benefit
-	 * of lookup() which explicitly checks the flag, rather than asking
-	 * the filesystem for its own opinion.  This means, that an update
-	 * mount of the underlying filesystem to go from rdonly to rdwr
-	 * will leave the unioned view as read-only.
+	 * Check mnt_flag
 	 */
-	mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY);
+	if ((ump->um_lowervp->v_mount->mnt_flag & MNT_LOCAL) &&
+	    (ump->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
+		mp->mnt_flag |= MNT_LOCAL;
 
-	mp->mnt_data = (qaddr_t) um;
+	/*
+	 * Get new fsid
+	 */
 	vfs_getnewfsid(mp);
 
-	switch (um->um_op) {
-	case UNMNT_ABOVE:
-		cp = "<above>:";
-		break;
-	case UNMNT_BELOW:
-		cp = "<below>:";
-		break;
-	case UNMNT_REPLACE:
-		cp = "";
-		break;
-	}
-	len = strlen(cp);
-	bcopy(cp, mp->mnt_stat.f_mntfromname, len);
-
-	cp = mp->mnt_stat.f_mntfromname + len;
-	len = MNAMELEN - len;
+	len = MNAMELEN - 1;
+	tmp = mp->mnt_stat.f_mntfromname;
+	copystr((below ? "<below>:" : "<above>:"), tmp, len, &done);
+	len -= done - 1;
+	tmp += done - 1;
+	copystr(target, tmp, len, NULL);
 
-	(void) copystr(target, cp, len - 1, &size);
-	bzero(cp + size, len - size);
+	UNIONFSDEBUG("unionfs_mount: from %s, on %s\n",
+	    mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
 
-	UDEBUG(("union_mount: from %s, on %s\n",
-		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname));
 	return (0);
-
-bad:
-	if (um) {
-		if (um->um_uppervp)
-			vput(um->um_uppervp);
-		if (um->um_lowervp)
-			vrele(um->um_lowervp);
-		/* XXX other fields */
-		free(um, M_UNIONFSMNT);
-	}
-	if (upperrootvp)
-		vput(upperrootvp);
-	if (lowerrootvp)
-		vrele(lowerrootvp);
-	return (error);
 }
 
 /*
- * Free reference to union layer.
+ * Free reference to unionfs layer
  */
 static int
-union_unmount(mp, mntflags, td)
-	struct mount *mp;
-	int mntflags;
-	struct thread *td;
-{
-	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
-	int error;
-	int freeing;
-	int flags = 0;
+unionfs_unmount(struct mount *mp, int mntflags, struct thread *td)
+{
+	struct unionfs_mount *ump;
+	int		error;
+	int		num;
+	int		freeing;
+	int		flags;
+
+	UNIONFSDEBUG("unionfs_unmount: mp = %p\n", (void *)mp);
 
-	UDEBUG(("union_unmount(mp = %p)\n", (void *)mp));
+	ump = MOUNTTOUNIONFSMOUNT(mp);
+	flags = 0;
 
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 
-	/*
-	 * Keep flushing vnodes from the mount list.
-	 * This is needed because of the un_pvp held
-	 * reference to the parent vnode.
-	 * If more vnodes have been freed on a given pass,
-	 * the try again.  The loop will iterate at most
-	 * (d) times, where (d) is the maximum tree depth
-	 * in the filesystem.
-	 */
-	for (freeing = 0; (error = vflush(mp, 0, flags, td)) != 0;) {
-		int n;
-
-		/* count #vnodes held on mount list */
-		n = mp->mnt_nvnodelistsize;
-
-		/* if this is unchanged then stop */
-		if (n == freeing)
+	/* vflush (no need to call vrele) */
+	for (freeing = 0; (error = vflush(mp, 1, flags, td)) != 0;) {
+		num = mp->mnt_nvnodelistsize;
+		if (num == freeing)
 			break;
-
-		/* otherwise try once more time */
-		freeing = n;
+		freeing = num;
 	}
 
-	/*
-	 * If the most recent vflush failed, the filesystem is still busy.
-	 */
 	if (error)
 		return (error);
 
-	/*
-	 * Discard references to upper and lower target vnodes.
-	 */
-	if (um->um_lowervp)
-		vrele(um->um_lowervp);
-	vrele(um->um_uppervp);
-	crfree(um->um_cred);
-	/*
-	 * Finally, throw away the union_mount structure.
-	 */
-	free(mp->mnt_data, M_UNIONFSMNT);	/* XXX */
+	free(ump, M_UNIONFSMNT);
 	mp->mnt_data = 0;
+
+	return (0);
+}
+
+static int
+unionfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
+{
+	struct unionfs_mount *ump;
+	struct vnode   *vp;
+
+	ump = MOUNTTOUNIONFSMOUNT(mp);
+	vp = ump->um_rootvp;
+
+	UNIONFSDEBUG("unionfs_root: rootvp=%p locked=%x\n",
+	    vp, VOP_ISLOCKED(vp, td));
+
+	vref(vp);
+	if (flags & LK_TYPE_MASK)
+		vn_lock(vp, flags, td);
+
+	*vpp = vp;
+
 	return (0);
 }
 
 static int
-union_root(mp, flags, vpp, td)
-	struct mount *mp;
-	int flags;
-	struct vnode **vpp;
-	struct thread *td;
-{
-	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
-	int error;
-
-	/*
-	 * Supply an unlocked reference to um_uppervp and to um_lowervp.  It
-	 * is possible for um_uppervp to be locked without the associated
-	 * root union_node being locked.  We let union_allocvp() deal with
-	 * it.
-	 */
-	UDEBUG(("union_root UPPERVP %p locked = %d\n", um->um_uppervp,
-	    VOP_ISLOCKED(um->um_uppervp, NULL)));
-
-	VREF(um->um_uppervp);
-	if (um->um_lowervp)
-		VREF(um->um_lowervp);
-
-	error = union_allocvp(vpp, mp, NULLVP, NULLVP, NULL, 
-		    um->um_uppervp, um->um_lowervp, 1);
-	UDEBUG(("error %d\n", error));
-	UDEBUG(("union_root2 UPPERVP %p locked = %d\n", um->um_uppervp,
-	    VOP_ISLOCKED(um->um_uppervp, NULL)));
+unionfs_quotactl(struct mount *mp, int cmd, uid_t uid, void *arg,
+    struct thread *td)
+{
+	struct unionfs_mount *ump;
+
+	ump = MOUNTTOUNIONFSMOUNT(mp);
 
-	return (error);
+	/*
+	 * Writing is always performed to upper vnode.
+	 */
+	return (VFS_QUOTACTL(ump->um_uppervp->v_mount, cmd, uid, arg, td));
 }
 
 static int
-union_statfs(mp, sbp, td)
-	struct mount *mp;
-	struct statfs *sbp;
-	struct thread *td;
-{
-	int error;
-	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
-	struct statfs mstat;
-	int lbsize;
+unionfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
+{
+	struct unionfs_mount *ump;
+	int		error;
+	struct statfs	mstat;
+	uint64_t	lbsize;
 
-	UDEBUG(("union_statfs(mp = %p, lvp = %p, uvp = %p)\n",
-	    (void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp));
+	ump = MOUNTTOUNIONFSMOUNT(mp);
+
+	UNIONFSDEBUG("unionfs_statfs(mp = %p, lvp = %p, uvp = %p)\n",
+	    (void *)mp, (void *)ump->um_lowervp, (void *)ump->um_uppervp);
 
 	bzero(&mstat, sizeof(mstat));
 
-	if (um->um_lowervp) {
-		error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, td);
-		if (error)
-			return (error);
-	}
+	error = VFS_STATFS(ump->um_lowervp->v_mount, &mstat, td);
+	if (error)
+		return (error);
 
-	/*
-	 * Now copy across the "interesting" information and fake the rest.
-	 */
-#if 0
-	sbp->f_type = mstat.f_type;
-	sbp->f_flags = mstat.f_flags;
-	sbp->f_bsize = mstat.f_bsize;
-	sbp->f_iosize = mstat.f_iosize;
-#endif
-	lbsize = mstat.f_bsize;
+	/* now copy across the "interesting" information and fake the rest */
 	sbp->f_blocks = mstat.f_blocks;
-	sbp->f_bfree = mstat.f_bfree;
-	sbp->f_bavail = mstat.f_bavail;
 	sbp->f_files = mstat.f_files;
-	sbp->f_ffree = mstat.f_ffree;
 
-	error = VFS_STATFS(um->um_uppervp->v_mount, &mstat, td);
+	lbsize = mstat.f_bsize;
+
+	error = VFS_STATFS(ump->um_uppervp->v_mount, &mstat, td);
 	if (error)
 		return (error);
 
+	/*
+	 * The FS type etc is copy from upper vfs.
+	 * (write able vfs have priority)
+	 */
+	sbp->f_type = mstat.f_type;
 	sbp->f_flags = mstat.f_flags;
 	sbp->f_bsize = mstat.f_bsize;
 	sbp->f_iosize = mstat.f_iosize;
 
-	/*
-	 * If the lower and upper blocksizes differ, then frig the
-	 * block counts so that the sizes reported by df make some
-	 * kind of sense.  None of this makes sense though.
-	 */
-
 	if (mstat.f_bsize != lbsize)
-		sbp->f_blocks = ((off_t) sbp->f_blocks * lbsize) / mstat.f_bsize;
+		sbp->f_blocks = ((off_t)sbp->f_blocks * lbsize) / mstat.f_bsize;
 
-	/*
-	 * The "total" fields count total resources in all layers,
-	 * the "free" fields count only those resources which are
-	 * free in the upper layer (since only the upper layer
-	 * is writeable).
-	 */
 	sbp->f_blocks += mstat.f_blocks;
 	sbp->f_bfree = mstat.f_bfree;
 	sbp->f_bavail = mstat.f_bavail;
@@ -466,12 +499,64 @@
 	return (0);
 }
 
-static struct vfsops union_vfsops = {
-	.vfs_init = 		union_init,
-	.vfs_mount =		union_mount,
-	.vfs_root =		union_root,
-	.vfs_statfs =		union_statfs,
-	.vfs_unmount =		union_unmount,
+static int
+unionfs_sync(struct mount *mp, int waitfor, struct thread *td)
+{
+	/* nothing to do */
+	return (0);
+}
+
+static int
+unionfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
+{
+	return (EOPNOTSUPP);
+}
+
+static int
+unionfs_fhtovp(struct mount *mp, struct fid *fidp, struct vnode **vpp)
+{
+	return (EOPNOTSUPP);
+}
+
+static int
+unionfs_checkexp(struct mount *mp, struct sockaddr *nam, int *extflagsp,
+		 struct ucred **credanonp)
+{
+	return (EOPNOTSUPP);
+}
+
+static int
+unionfs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
+    int namespace, const char *attrname, struct thread *td)
+{
+	struct unionfs_mount *ump;
+	struct unionfs_node *unp;
+
+	ump = MOUNTTOUNIONFSMOUNT(mp);
+	unp = VTOUNIONFS(filename_vp);
+
+	if (unp->un_uppervp != NULLVP) {
+		return (VFS_EXTATTRCTL(ump->um_uppervp->v_mount, cmd,
+		    unp->un_uppervp, namespace, attrname, td));
+	} else {
+		return (VFS_EXTATTRCTL(ump->um_lowervp->v_mount, cmd,
+		    unp->un_lowervp, namespace, attrname, td));
+	}
+}
+
+static struct vfsops unionfs_vfsops = {
+	.vfs_checkexp =		unionfs_checkexp,
+	.vfs_extattrctl =	unionfs_extattrctl,
+	.vfs_fhtovp =		unionfs_fhtovp,
+	.vfs_init =		unionfs_init,
+	.vfs_mount =		unionfs_domount,
+	.vfs_quotactl =		unionfs_quotactl,
+	.vfs_root =		unionfs_root,
+	.vfs_statfs =		unionfs_statfs,
+	.vfs_sync =		unionfs_sync,
+	.vfs_uninit =		unionfs_uninit,
+	.vfs_unmount =		unionfs_unmount,
+	.vfs_vget =		unionfs_vget,
 };
 
-VFS_SET(union_vfsops, unionfs, VFCF_LOOPBACK);
+VFS_SET(unionfs_vfsops, unionfs, VFCF_LOOPBACK);
Index: union_subr.c
===================================================================
RCS file: /home/cvs/src/sys/fs/unionfs/union_subr.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/unionfs/union_subr.c -L sys/fs/unionfs/union_subr.c -u -r1.1.1.1 -r1.2
--- sys/fs/unionfs/union_subr.c
+++ sys/fs/unionfs/union_subr.c
@@ -2,6 +2,8 @@
  * Copyright (c) 1994 Jan-Simon Pendry
  * Copyright (c) 1994
  *	The Regents of the University of California.  All rights reserved.
+ * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa at ongs.co.jp>, ONGS Inc.
+ * Copyright (c) 2006 Daichi Goto <daichi at freebsd.org>
  *
  * This code is derived from software contributed to Berkeley by
  * Jan-Simon Pendry.
@@ -31,787 +33,495 @@
  * SUCH DAMAGE.
  *
  *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
- * $FreeBSD: src/sys/fs/unionfs/union_subr.c,v 1.86 2005/04/27 09:06:06 jeff Exp $
+ * $FreeBSD: src/sys/fs/unionfs/union_subr.c,v 1.92.2.2 2007/10/22 05:30:40 daichi Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/fcntl.h>
-#include <sys/file.h>
-#include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
+#include <sys/mutex.h>
 #include <sys/malloc.h>
-#include <sys/module.h>
 #include <sys/mount.h>
-#include <sys/mutex.h>
 #include <sys/namei.h>
-#include <sys/stat.h>
+#include <sys/proc.h>
 #include <sys/vnode.h>
+#include <sys/dirent.h>
+#include <sys/fcntl.h>
+#include <sys/filedesc.h>
+#include <sys/stat.h>
+#include <sys/resourcevar.h>
+
+#ifdef MAC
+#include <sys/mac.h>
+#endif
 
-#include <vm/vm.h>
-#include <vm/vm_extern.h>	/* for vnode_pager_setsize */
-#include <vm/vm_object.h>	/* for vm cache coherency */
 #include <vm/uma.h>
 
 #include <fs/unionfs/union.h>
 
-#include <sys/proc.h>
-
-extern int	union_init(void);
+MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
+MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
 
-/* must be power of two, otherwise change UNION_HASH() */
-#define NHASH 32
-
-/* unsigned int ... */
-#define UNION_HASH(u, l) \
-	(((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1))
-
-static MALLOC_DEFINE(M_UNPATH, "unpath", "UNION path component");
-static MALLOC_DEFINE(M_UNDCACHE, "undcac", "UNION directory cache");
-
-static LIST_HEAD(unhead, union_node) unhead[NHASH];
-static int unvplock[NHASH];
-
-static void	union_dircache_r(struct vnode *vp, struct vnode ***vppp,
-				      int *cntp);
-static int	union_list_lock(int ix);
-static void	union_list_unlock(int ix);
-static int	union_relookup(struct union_mount *um, struct vnode *dvp,
-				    struct vnode **vpp,
-				    struct componentname *cnp,
-				    struct componentname *cn, char *path,
-				    int pathlen);
-static void	union_updatevp(struct union_node *un,
-				    struct vnode *uppervp,
-				    struct vnode *lowervp);
-static void union_newlower(struct union_node *, struct vnode *);
-static void union_newupper(struct union_node *, struct vnode *);
-static int union_copyfile(struct vnode *, struct vnode *,
-					struct ucred *, struct thread *);
-static int union_vn_create(struct vnode **, struct union_node *,
-				struct thread *);
-static int union_vn_close(struct vnode *, int, struct ucred *,
-				struct thread *);
-
-int
-union_init()
+/*
+ * Initialize
+ */
+int 
+unionfs_init(struct vfsconf *vfsp)
 {
-	int i;
-
-	for (i = 0; i < NHASH; i++)
-		LIST_INIT(&unhead[i]);
-	bzero((caddr_t)unvplock, sizeof(unvplock));
+	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
 	return (0);
 }
 
-static int
-union_list_lock(ix)
-	int ix;
+/*
+ * Uninitialize
+ */
+int 
+unionfs_uninit(struct vfsconf *vfsp)
 {
-	if (unvplock[ix] & UNVP_LOCKED) {
-		unvplock[ix] |= UNVP_WANT;
-		(void) tsleep( &unvplock[ix], PINOD, "unllck", 0);
-		return (1);
-	}
-	unvplock[ix] |= UNVP_LOCKED;
 	return (0);
 }
 
-static void
-union_list_unlock(ix)
-	int ix;
-{
-	unvplock[ix] &= ~UNVP_LOCKED;
-
-	if (unvplock[ix] & UNVP_WANT) {
-		unvplock[ix] &= ~UNVP_WANT;
-		wakeup( &unvplock[ix]);
-	}
-}
-
 /*
- *	union_updatevp:
- *
- *	The uppervp, if not NULL, must be referenced and not locked by us
- *	The lowervp, if not NULL, must be referenced.
- *
- *	If uppervp and lowervp match pointers already installed, then
- *	nothing happens. The passed vp's (when matching) are not adjusted.
- *
- *	This routine may only be called by union_newupper() and
- *	union_newlower().
+ * Make a new or get existing unionfs node.
+ * 
+ * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
+ * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
+ * you should not lock plurality simultaneously.
  */
+int
+unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
+		struct vnode *lowervp, struct vnode *dvp,
+		struct vnode **vpp, struct componentname *cnp,
+		struct thread *td)
+{
+	struct unionfs_mount *ump;
+	struct unionfs_node *unp;
+	struct vnode   *vp;
+	int		error;
+	int		lkflags;
+	char	       *path;
+
+	ump = MOUNTTOUNIONFSMOUNT(mp);
+	lkflags = (cnp ? cnp->cn_lkflags : 0);
+	path = (cnp ? cnp->cn_nameptr : NULL);
 
-static void
-union_updatevp(un, uppervp, lowervp)
-	struct union_node *un;
-	struct vnode *uppervp;
-	struct vnode *lowervp;
-{
-	int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
-	int nhash = UNION_HASH(uppervp, lowervp);
-	int docache = (lowervp != NULLVP || uppervp != NULLVP);
-	int lhash, uhash;
+	if (uppervp == NULLVP && lowervp == NULLVP)
+		panic("unionfs_nodeget: upper and lower is null");
 
-	/*
-	 * Ensure locking is ordered from lower to higher
-	 * to avoid deadlocks.
-	 */
-	if (nhash < ohash) {
-		lhash = nhash;
-		uhash = ohash;
-	} else {
-		lhash = ohash;
-		uhash = nhash;
-	}
-
-	if (lhash != uhash) {
-		while (union_list_lock(lhash))
-			continue;
-	}
-
-	while (union_list_lock(uhash))
-		continue;
-
-	if (ohash != nhash || !docache) {
-		if (un->un_flags & UN_CACHED) {
-			un->un_flags &= ~UN_CACHED;
-			LIST_REMOVE(un, un_cache);
-		}
+	/* If it has no ISLASTCN flag, path check is skipped. */
+	if (cnp && !(cnp->cn_flags & ISLASTCN))
+		path = NULL;
+
+	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
+	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
+		if (dvp == NULLVP)
+			return (EINVAL);
 	}
 
-	if (ohash != nhash)
-		union_list_unlock(ohash);
-
-	if (un->un_lowervp != lowervp) {
-		if (un->un_lowervp) {
-			vrele(un->un_lowervp);
-			if (un->un_path) {
-				free(un->un_path, M_UNPATH);
-				un->un_path = 0;
-			}
-		}
-		un->un_lowervp = lowervp;
-		un->un_lowersz = VNOVAL;
-	}
+	/*
+	 * Do the MALLOC before the getnewvnode since doing so afterward
+	 * might cause a bogus v_data pointer to get dereferenced elsewhere
+	 * if MALLOC should block.
+	 */
+	MALLOC(unp, struct unionfs_node *, sizeof(struct unionfs_node),
+	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
 
-	if (un->un_uppervp != uppervp) {
-		if (un->un_uppervp)
-			vrele(un->un_uppervp);
-		un->un_uppervp = uppervp;
-		un->un_uppersz = VNOVAL;
+	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
+	if (error != 0) {
+		FREE(unp, M_UNIONFSNODE);
+		return (error);
 	}
-
-	if (docache && (ohash != nhash)) {
-		LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
-		un->un_flags |= UN_CACHED;
+	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
+	if (error != 0) {
+		FREE(unp, M_UNIONFSNODE);
+		return (error);
 	}
+	if (dvp != NULLVP)
+		vref(dvp);
+	if (uppervp != NULLVP)
+		vref(uppervp);
+	if (lowervp != NULLVP)
+		vref(lowervp);
+
+	unp->un_vnode = vp;
+	unp->un_uppervp = uppervp;
+	unp->un_lowervp = lowervp;
+	unp->un_dvp = dvp;
+	if (uppervp != NULLVP)
+		vp->v_vnlock = uppervp->v_vnlock;
+	else
+		vp->v_vnlock = lowervp->v_vnlock;
 
-	union_list_unlock(nhash);
-}
-
-/*
- * Set a new lowervp.  The passed lowervp must be referenced and will be
- * stored in the vp in a referenced state. 
- */
+	if (path != NULL) {
+		unp->un_path = (char *)
+		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
+		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
+		unp->un_path[cnp->cn_namelen] = '\0';
+	}
+	vp->v_type = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
+	vp->v_data = unp;
+
+	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
+	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
+		vp->v_vflag |= VV_ROOT;
 
-static void
-union_newlower(un, lowervp)
-	struct union_node *un;
-	struct vnode *lowervp;
-{
-	union_updatevp(un, un->un_uppervp, lowervp);
-}
+	if (lkflags & LK_TYPE_MASK)
+		vn_lock(vp, lkflags | LK_RETRY, td);
 
-/*
- * Set a new uppervp.  The passed uppervp must be locked and will be 
- * stored in the vp in a locked state.  The caller should not unlock
- * uppervp.
- */
+	*vpp = vp;
 
-static void
-union_newupper(un, uppervp)
-	struct union_node *un;
-	struct vnode *uppervp;
-{
-	union_updatevp(un, uppervp, un->un_lowervp);
+	return (0);
 }
 
 /*
- * Keep track of size changes in the underlying vnodes.
- * If the size changes, then callback to the vm layer
- * giving priority to the upper layer size.
+ * Clean up the unionfs node.
  */
 void
-union_newsize(vp, uppersz, lowersz)
-	struct vnode *vp;
-	off_t uppersz, lowersz;
+unionfs_noderem(struct vnode *vp, struct thread *td)
 {
-	struct union_node *un;
-	off_t sz;
-
-	/* only interested in regular files */
-	if (vp->v_type != VREG)
-		return;
+	int		vfslocked;
+	struct unionfs_node *unp;
+	struct unionfs_node_status *unsp, *unsp_tmp;
+	struct vnode   *lvp;
+	struct vnode   *uvp;
 
-	un = VTOUNION(vp);
-	sz = VNOVAL;
-
-	if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
-		un->un_uppersz = uppersz;
-		if (sz == VNOVAL)
-			sz = un->un_uppersz;
-	}
-
-	if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
-		un->un_lowersz = lowersz;
-		if (sz == VNOVAL)
-			sz = un->un_lowersz;
-	}
+	/*
+	 * Use the interlock to protect the clearing of v_data to
+	 * prevent faults in unionfs_lock().
+	 */
+	VI_LOCK(vp);
+	unp = VTOUNIONFS(vp);
+	lvp = unp->un_lowervp;
+	uvp = unp->un_uppervp;
+	unp->un_lowervp = unp->un_uppervp = NULLVP;
+
+	vp->v_vnlock = &(vp->v_lock);
+	vp->v_data = NULL;
+	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp), td);
+	if (lvp != NULLVP)
+		VOP_UNLOCK(lvp, 0, td);
+	if (uvp != NULLVP)
+		VOP_UNLOCK(uvp, 0, td);
+	vp->v_object = NULL;
 
-	if (sz != VNOVAL) {
-		UDEBUG(("union: %s size now %ld\n",
-			(uppersz != VNOVAL ? "upper" : "lower"), (long)sz));
-		/*
-		 * There is no need to change size of non-existent object.
-		 */
-		/* vnode_pager_setsize(vp, sz); */
+	if (lvp != NULLVP) {
+		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
+		vrele(lvp);
+		VFS_UNLOCK_GIANT(vfslocked);
+	}
+	if (uvp != NULLVP) {
+		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
+		vrele(uvp);
+		VFS_UNLOCK_GIANT(vfslocked);
+	}
+	if (unp->un_dvp != NULLVP) {
+		vfslocked = VFS_LOCK_GIANT(unp->un_dvp->v_mount);
+		vrele(unp->un_dvp);
+		VFS_UNLOCK_GIANT(vfslocked);
+		unp->un_dvp = NULLVP;
+	}
+	if (unp->un_path) {
+		free(unp->un_path, M_UNIONFSPATH);
+		unp->un_path = NULL;
+	}
+
+	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
+		LIST_REMOVE(unsp, uns_list);
+		free(unsp, M_TEMP);
 	}
+	FREE(unp, M_UNIONFSNODE);
 }
 
 /*
- *	union_allocvp:	allocate a union_node and associate it with a
- *			parent union_node and one or two vnodes.
- *
- *	vpp	Holds the returned vnode locked and referenced if no 
- *		error occurs.
- *
- *	mp	Holds the mount point.  mp may or may not be busied. 
- *		allocvp() makes no changes to mp.
- *
- *	dvp	Holds the parent union_node to the one we wish to create.
- *		XXX may only be used to traverse an uncopied lowervp-based
- *		tree?  XXX
- *
- *		dvp may or may not be locked.  allocvp() makes no changes
- *		to dvp.
- *
- *	upperdvp Holds the parent vnode to uppervp, generally used along
- *		with path component information to create a shadow of
- *		lowervp when uppervp does not exist.
- *
- *		upperdvp is referenced but unlocked on entry, and will be
- *		dereferenced on return.
- *
- *	uppervp	Holds the new uppervp vnode to be stored in the 
- *		union_node we are allocating.  uppervp is referenced but
- *		not locked, and will be dereferenced on return.
- *
- *	lowervp	Holds the new lowervp vnode to be stored in the
- *		union_node we are allocating.  lowervp is referenced but
- *		not locked, and will be dereferenced on return.
- * 
- *	cnp	Holds path component information to be coupled with
- *		lowervp and upperdvp to allow unionfs to create an uppervp
- *		later on.  Only used if lowervp is valid.  The contents
- *		of cnp is only valid for the duration of the call.
- *
- *	docache	Determine whether this node should be entered in the
- *		cache or whether it should be destroyed as soon as possible.
- *
- * All union_nodes are maintained on a singly-linked
- * list.  New nodes are only allocated when they cannot
- * be found on this list.  Entries on the list are
- * removed when the vfs reclaim entry is called.
- *
- * A single lock is kept for the entire list.  This is
- * needed because the getnewvnode() function can block
- * waiting for a vnode to become free, in which case there
- * may be more than one process trying to get the same
- * vnode.  This lock is only taken if we are going to
- * call getnewvnode(), since the kernel itself is single-threaded.
- *
- * If an entry is found on the list, then call vget() to
- * take a reference.  This is done because there may be
- * zero references to it and so it needs to removed from
- * the vnode free list.
+ * Get the unionfs node status.
+ * You need exclusive lock this vnode.
  */
-
-int
-union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache)
-	struct vnode **vpp;
-	struct mount *mp;
-	struct vnode *dvp;		/* parent union vnode */
-	struct vnode *upperdvp;		/* parent vnode of uppervp */
-	struct componentname *cnp;	/* may be null */
-	struct vnode *uppervp;		/* may be null */
-	struct vnode *lowervp;		/* may be null */
-	int docache;
+void
+unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
+			struct unionfs_node_status **unspp)
 {
-	int error;
-	struct union_node *un = 0;
-	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
-	struct thread *td = (cnp) ? cnp->cn_thread : curthread;
-	int hash = 0;
-	int vflag;
-	int try;
-
-	if (uppervp == NULLVP && lowervp == NULLVP)
-		panic("union: unidentifiable allocation");
+	struct unionfs_node_status *unsp;
 
-	if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
-		vrele(lowervp);
-		lowervp = NULLVP;
-	}
+	KASSERT(NULL != unspp, ("null pointer"));
+	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
 
-	/* detect the root vnode (and aliases) */
-	vflag = 0;
-	if ((uppervp == um->um_uppervp) &&
-	    ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
-		if (lowervp == NULLVP) {
-			lowervp = um->um_lowervp;
-			if (lowervp != NULLVP)
-				VREF(lowervp);
+	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
+		if (unsp->uns_tid == td->td_tid) {
+			*unspp = unsp;
+			return;
 		}
-		vflag = VV_ROOT;
 	}
 
-loop:
-	if (!docache) {
-		un = 0;
-	} else for (try = 0; try < 3; try++) {
-		switch (try) {
-		case 0:
-			if (lowervp == NULLVP)
-				continue;
-			hash = UNION_HASH(uppervp, lowervp);
-			break;
+	/* create a new unionfs node status */
+	MALLOC(unsp, struct unionfs_node_status *,
+	    sizeof(struct unionfs_node_status), M_TEMP, M_WAITOK | M_ZERO);
 
-		case 1:
-			if (uppervp == NULLVP)
-				continue;
-			hash = UNION_HASH(uppervp, NULLVP);
-			break;
-
-		case 2:
-			if (lowervp == NULLVP)
-				continue;
-			hash = UNION_HASH(NULLVP, lowervp);
-			break;
-		}
-
-		while (union_list_lock(hash))
-			continue;
-
-		LIST_FOREACH(un, &unhead[hash], un_cache) {
-			if ((un->un_lowervp == lowervp ||
-			     un->un_lowervp == NULLVP) &&
-			    (un->un_uppervp == uppervp ||
-			     un->un_uppervp == NULLVP) &&
-			    (UNIONTOV(un)->v_mount == mp)) {
-				if (vget(UNIONTOV(un), 0,
-				    cnp ? cnp->cn_thread : NULL)) {
-					union_list_unlock(hash);
-					goto loop;
-				}
-				break;
-			}
-		}
+	unsp->uns_tid = td->td_tid;
+	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
 
-		union_list_unlock(hash);
+	*unspp = unsp;
+}
 
-		if (un)
-			break;
-	}
+/*
+ * Remove the unionfs node status, if you can.
+ * You need exclusive lock this vnode.
+ */
+void
+unionfs_tryrem_node_status(struct unionfs_node *unp, struct thread *td,
+			   struct unionfs_node_status *unsp)
+{
+	KASSERT(NULL != unsp, ("null pointer"));
+	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
 
-	if (un) {
-		/*
-		 * Obtain a lock on the union_node.  Everything is unlocked
-		 * except for dvp, so check that case.  If they match, our
-		 * new un is already locked.  Otherwise we have to lock our
-		 * new un.
-		 *
-		 * A potential deadlock situation occurs when we are holding
-		 * one lock while trying to get another.  We must follow 
-		 * strict ordering rules to avoid it.  We try to locate dvp
-		 * by scanning up from un_vnode, since the most likely 
-		 * scenario is un being under dvp.
-		 */
+	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
+		return;
 
-		if (dvp && un->un_vnode != dvp) {
-			struct vnode *scan = un->un_vnode;
+	LIST_REMOVE(unsp, uns_list);
+	free(unsp, M_TEMP);
+}
 
-			do {
-				scan = VTOUNION(scan)->un_pvp;
-			} while (scan && scan->v_op == &union_vnodeops &&
-				 scan != dvp);
-			if (scan != dvp) {
-				/*
-				 * our new un is above dvp (we never saw dvp
-				 * while moving up the tree).
-				 */
-				VREF(dvp);
-				VOP_UNLOCK(dvp, 0, td);
-				error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td);
-				vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
-				vrele(dvp);
-			} else {
-				/*
-				 * our new un is under dvp
-				 */
-				error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td);
-			}
-		} else if (dvp == NULLVP) {
-			/*
-			 * dvp is NULL, we need to lock un.
-			 */
-			error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td);
+/*
+ * Create upper node attr.
+ */
+void
+unionfs_create_uppervattr_core(struct unionfs_mount *ump,
+			       struct vattr *lva,
+			       struct vattr *uva,
+			       struct thread *td)
+{
+	VATTR_NULL(uva);
+	uva->va_type = lva->va_type;
+	uva->va_atime = lva->va_atime;
+	uva->va_mtime = lva->va_mtime;
+	uva->va_ctime = lva->va_ctime;
+
+	switch (ump->um_copymode) {
+	case UNIONFS_TRANSPARENT:
+		uva->va_mode = lva->va_mode;
+		uva->va_uid = lva->va_uid;
+		uva->va_gid = lva->va_gid;
+		break;
+	case UNIONFS_MASQUERADE:
+		if (ump->um_uid == lva->va_uid) {
+			uva->va_mode = lva->va_mode & 077077;
+			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
+			uva->va_uid = lva->va_uid;
+			uva->va_gid = lva->va_gid;
 		} else {
-			/*
-			 * dvp == un->un_vnode, we are already locked.
-			 */
-			error = 0;
-		}
-
-		if (error)
-			goto loop;
-
-		/*
-		 * At this point, the union_node is locked and referenced.
-		 *
-		 * uppervp is locked and referenced or NULL, lowervp is
-		 * referenced or NULL.
-		 */
-		UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n",
-			un, un->un_vnode, un->un_uppervp, 
-			(un->un_uppervp ? vrefcnt(un->un_uppervp) : -99),
-			uppervp,
-			(uppervp ? vrefcnt(uppervp) : -99)
-		));
-
-		if (uppervp != un->un_uppervp) {
-			KASSERT(uppervp == NULL || vrefcnt(uppervp) > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", vrefcnt(uppervp)));
-			union_newupper(un, uppervp);
-		} else if (uppervp) {
-			KASSERT(vrefcnt(uppervp) > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", vrefcnt(uppervp)));
-			vrele(uppervp);
-		}
-
-		/*
-		 * Save information about the lower layer.
-		 * This needs to keep track of pathname
-		 * and directory information which union_vn_create()
-		 * might need.
-		 */
-		if (lowervp != un->un_lowervp) {
-			union_newlower(un, lowervp);
-			if (cnp && (lowervp != NULLVP)) {
-				un->un_path = malloc(cnp->cn_namelen+1,
-						M_UNPATH, M_WAITOK);
-				bcopy(cnp->cn_nameptr, un->un_path,
-						cnp->cn_namelen);
-				un->un_path[cnp->cn_namelen] = '\0';
-			}
-		} else if (lowervp) {
-			vrele(lowervp);
+			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
+			uva->va_uid = ump->um_uid;
+			uva->va_gid = ump->um_gid;
 		}
+		break;
+	default:		/* UNIONFS_TRADITIONAL */
+		FILEDESC_SLOCK(td->td_proc->p_fd);
+		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
+		FILEDESC_SUNLOCK(td->td_proc->p_fd);
+		uva->va_uid = ump->um_uid;
+		uva->va_gid = ump->um_gid;
+		break;
+	}
+}
 
-		/*
-		 * and upperdvp
-		 */
-		if (upperdvp != un->un_dirvp) {
-			if (un->un_dirvp)
-				vrele(un->un_dirvp);
-			un->un_dirvp = upperdvp;
-		} else if (upperdvp) {
-			vrele(upperdvp);
-		}
+/*
+ * Create upper node attr.
+ */
+int
+unionfs_create_uppervattr(struct unionfs_mount *ump,
+			  struct vnode *lvp,
+			  struct vattr *uva,
+			  struct ucred *cred,
+			  struct thread *td)
+{
+	int		error;
+	struct vattr	lva;
 
-		*vpp = UNIONTOV(un);
-		return (0);
-	}
+	if ((error = VOP_GETATTR(lvp, &lva, cred, td)))
+		return (error);
 
-	if (docache) {
-		/*
-		 * Otherwise lock the vp list while we call getnewvnode()
-		 * since that can block.
-		 */ 
-		hash = UNION_HASH(uppervp, lowervp);
+	unionfs_create_uppervattr_core(ump, &lva, uva, td);
 
-		if (union_list_lock(hash))
-			goto loop;
-	}
+	return (error);
+}
 
-	/*
-	 * Create new node rather than replace old node.
-	 */
+/*
+ * relookup
+ * 
+ * dvp should be locked on entry and will be locked on return.
+ * 
+ * If an error is returned, *vpp will be invalid, otherwise it will hold a
+ * locked, referenced vnode. If *vpp == dvp then remember that only one
+ * LK_EXCLUSIVE lock is held.
+ */
+static int
+unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
+		 struct componentname *cnp, struct componentname *cn,
+		 struct thread *td, char *path, int pathlen, u_long nameiop)
+{
+	int	error;
 
-	error = getnewvnode("union", mp, &union_vnodeops, vpp);
-	if (error) {
-		/*
-		 * If an error occurs, clear out vnodes.
-		 */
-		if (lowervp)
-			vrele(lowervp);
-		if (uppervp) 
-			vrele(uppervp);
-		if (upperdvp)
-			vrele(upperdvp);
-		*vpp = NULL;
-		goto out;
-	}
-
-	MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
-		M_TEMP, M_WAITOK);
-
-	(*vpp)->v_vflag |= vflag;
-	if (uppervp)
-		(*vpp)->v_type = uppervp->v_type;
-	else
-		(*vpp)->v_type = lowervp->v_type;
+	cn->cn_namelen = pathlen;
+	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
+	bcopy(path, cn->cn_pnbuf, pathlen);
+	cn->cn_pnbuf[pathlen] = '\0';
 
-	un = VTOUNION(*vpp);
-	bzero(un, sizeof(*un));
+	cn->cn_nameiop = nameiop;
+	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
+	cn->cn_lkflags = LK_EXCLUSIVE;
+	cn->cn_thread = td;
+	cn->cn_cred = cnp->cn_cred;
 
-	vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td);
+	cn->cn_nameptr = cn->cn_pnbuf;
+	cn->cn_consume = cnp->cn_consume;
 
-	un->un_vnode = *vpp;
-	un->un_uppervp = uppervp;
-	un->un_uppersz = VNOVAL;
-	un->un_lowervp = lowervp;
-	un->un_lowersz = VNOVAL;
-	un->un_dirvp = upperdvp;
-	un->un_pvp = dvp;		/* only parent dir in new allocation */
-	if (dvp != NULLVP)
-		VREF(dvp);
-	un->un_dircache = NULL;
-	un->un_openl = 0;
-
-	if (cnp && (lowervp != NULLVP)) {
-		un->un_path = malloc(cnp->cn_namelen+1, M_UNPATH, M_WAITOK);
-		bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
-		un->un_path[cnp->cn_namelen] = '\0';
-	} else {
-		un->un_path = NULL;
-		un->un_dirvp = NULL;
-	}
-
-	if (docache) {
-		LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
-		un->un_flags |= UN_CACHED;
-	}
-
-out:
-	if (docache)
-		union_list_unlock(hash);
+	if (nameiop == DELETE)
+		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
+	else if (RENAME == nameiop)
+		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
+
+	vref(dvp);
+	VOP_UNLOCK(dvp, 0, td);
+
+	if ((error = relookup(dvp, vpp, cn))) {
+		uma_zfree(namei_zone, cn->cn_pnbuf);
+		cn->cn_flags &= ~HASBUF;
+		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
+	} else
+		vrele(dvp);
 
 	return (error);
 }
 
+/*
+ * relookup for CREATE namei operation.
+ *
+ * dvp is unionfs vnode. dvp should be locked.
+ *
+ * If it called 'unionfs_copyfile' function by unionfs_link etc,
+ * VOP_LOOKUP information is broken.
+ * So it need relookup in order to create link etc.
+ */
 int
-union_freevp(vp)
-	struct vnode *vp;
+unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
+			    struct thread *td)
 {
-	struct union_node *un = VTOUNION(vp);
+	int	error;
+	struct vnode *udvp;
+	struct vnode *vp;
+	struct componentname cn;
 
-	if (un->un_flags & UN_CACHED) {
-		un->un_flags &= ~UN_CACHED;
-		LIST_REMOVE(un, un_cache);
-	}
+	udvp = UNIONFSVPTOUPPERVP(dvp);
+	vp = NULLVP;
 
-	if (un->un_pvp != NULLVP) {
-		vrele(un->un_pvp);
-		un->un_pvp = NULL;
-	}
-	if (un->un_uppervp != NULLVP) {
-		vrele(un->un_uppervp);
-		un->un_uppervp = NULL;
-	}
-	if (un->un_lowervp != NULLVP) {
-		vrele(un->un_lowervp);
-		un->un_lowervp = NULL;
-	}
-	if (un->un_dirvp != NULLVP) {
-		vrele(un->un_dirvp);
-		un->un_dirvp = NULL;
+	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
+	    strlen(cnp->cn_nameptr), CREATE);
+	if (error)
+		return (error);
+
+	if (vp != NULLVP) {
+		if (udvp == vp)
+			vrele(vp);
+		else
+			vput(vp);
+
+		error = EEXIST;
 	}
-	if (un->un_path) {
-		free(un->un_path, M_UNPATH);
-		un->un_path = NULL;
+
+	if (cn.cn_flags & HASBUF) {
+		uma_zfree(namei_zone, cn.cn_pnbuf);
+		cn.cn_flags &= ~HASBUF;
 	}
 
-	FREE(vp->v_data, M_TEMP);
-	vp->v_data = 0;
-	vp->v_object = NULL;
+	if (!error) {
+		cn.cn_flags |= (cnp->cn_flags & HASBUF);
+		cnp->cn_flags = cn.cn_flags;
+	}
 
-	return (0);
+	return (error);
 }
 
 /*
- * copyfile.  Copy the vnode (fvp) to the vnode (tvp)
- * using a sequence of reads and writes.  Both (fvp)
- * and (tvp) are locked on entry and exit.
+ * relookup for DELETE namei operation.
  *
- * fvp and tvp are both exclusive locked on call, but their refcount's
- * haven't been bumped at all.
+ * dvp is unionfs vnode. dvp should be locked.
  */
-static int
-union_copyfile(fvp, tvp, cred, td)
-	struct vnode *fvp;
-	struct vnode *tvp;
-	struct ucred *cred;
-	struct thread *td;
-{
-	char *buf;
-	struct uio uio;
-	struct iovec iov;
-	int error = 0;
-
-	/*
-	 * strategy:
-	 * Allocate a buffer of size MAXBSIZE.
-	 * Loop doing reads and writes, keeping track
-	 * of the current uio offset.
-	 * Give up at the first sign of trouble.
-	 */
-
-	bzero(&uio, sizeof(uio));
-
-	uio.uio_td = td;
-	uio.uio_segflg = UIO_SYSSPACE;
-	uio.uio_offset = 0;
-
-	VOP_LEASE(fvp, td, cred, LEASE_READ);
-	VOP_LEASE(tvp, td, cred, LEASE_WRITE);
-
-	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
-
-	/* ugly loop follows... */
-	do {
-		off_t offset = uio.uio_offset;
-		int count;
-		int bufoffset;
+int
+unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
+			    struct thread *td)
+{
+	int	error;
+	struct vnode *udvp;
+	struct vnode *vp;
+	struct componentname cn;
 
-		/*
-		 * Setup for big read.
-		 */
-		uio.uio_iov = &iov;
-		uio.uio_iovcnt = 1;
-		iov.iov_base = buf;
-		iov.iov_len = MAXBSIZE;
-		uio.uio_resid = iov.iov_len;
-		uio.uio_rw = UIO_READ;
+	udvp = UNIONFSVPTOUPPERVP(dvp);
+	vp = NULLVP;
 
-		if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0)
-			break;
+	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
+	    strlen(cnp->cn_nameptr), DELETE);
+	if (error)
+		return (error);
 
-		/*
-		 * Get bytes read, handle read eof case and setup for
-		 * write loop.
-		 */
-		if ((count = MAXBSIZE - uio.uio_resid) == 0)
-			break;
-		bufoffset = 0;
+	if (vp == NULLVP)
+		error = ENOENT;
+	else {
+		if (udvp == vp)
+			vrele(vp);
+		else
+			vput(vp);
+	}
 
-		/*
-		 * Write until an error occurs or our buffer has been
-		 * exhausted, then update the offset for the next read.
-		 */
-		while (bufoffset < count) {
-			uio.uio_iov = &iov;
-			uio.uio_iovcnt = 1;
-			iov.iov_base = buf + bufoffset;
-			iov.iov_len = count - bufoffset;
-			uio.uio_offset = offset + bufoffset;
-			uio.uio_rw = UIO_WRITE;
-			uio.uio_resid = iov.iov_len;
+	if (cn.cn_flags & HASBUF) {
+		uma_zfree(namei_zone, cn.cn_pnbuf);
+		cn.cn_flags &= ~HASBUF;
+	}
 
-			if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0)
-				break;
-			bufoffset += (count - bufoffset) - uio.uio_resid;
-		}
-		uio.uio_offset = offset + bufoffset;
-	} while (error == 0);
+	if (!error) {
+		cn.cn_flags |= (cnp->cn_flags & HASBUF);
+		cnp->cn_flags = cn.cn_flags;
+	}
 
-	free(buf, M_TEMP);
 	return (error);
 }
 
 /*
+ * relookup for RENAME namei operation.
  *
- * un's vnode is assumed to be locked on entry and remains locked on exit.
+ * dvp is unionfs vnode. dvp should be locked.
  */
-
 int
-union_copyup(un, docopy, cred, td)
-	struct union_node *un;
-	int docopy;
-	struct ucred *cred;
-	struct thread *td;
+unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
+			    struct thread *td)
 {
 	int error;
-	struct mount *mp;
-	struct vnode *lvp, *uvp;
+	struct vnode *udvp;
+	struct vnode *vp;
+	struct componentname cn;
 
-	/*
-	 * If the user does not have read permission, the vnode should not
-	 * be copied to upper layer.
-	 */
-	vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, td);
-	error = VOP_ACCESS(un->un_lowervp, VREAD, cred, td);
-	VOP_UNLOCK(un->un_lowervp, 0, td);
+	udvp = UNIONFSVPTOUPPERVP(dvp);
+	vp = NULLVP;
+
+	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
+	    strlen(cnp->cn_nameptr), RENAME);
 	if (error)
 		return (error);
 
-	if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0)
-		return (error);
-	if ((error = union_vn_create(&uvp, un, td)) != 0) {
-		vn_finished_write(mp);
-		return (error);
+	if (vp != NULLVP) {
+		if (udvp == vp)
+			vrele(vp);
+		else
+			vput(vp);
 	}
 
-	lvp = un->un_lowervp;
-
-	KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp)));
-	if (docopy) {
-		/*
-		 * XX - should not ignore errors
-		 * from VOP_CLOSE()
-		 */
-		vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td);
-		error = VOP_OPEN(lvp, FREAD, cred, td, -1);
-		if (error == 0) {
-			error = union_copyfile(lvp, uvp, cred, td);
-			VOP_UNLOCK(lvp, 0, td);
-			(void) VOP_CLOSE(lvp, FREAD, cred, td);
-		}
-		if (error == 0)
-			UDEBUG(("union: copied up %s\n", un->un_path));
-
+	if (cn.cn_flags & HASBUF) {
+		uma_zfree(namei_zone, cn.cn_pnbuf);
+		cn.cn_flags &= ~HASBUF;
 	}
-	VOP_UNLOCK(uvp, 0, td);
-	vn_finished_write(mp);
-	union_newupper(un, uvp);
-	KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp)));
-	union_vn_close(uvp, FWRITE, cred, td);
-	KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp)));
-	/*
-	 * Subsequent IOs will go to the top layer, so
-	 * call close on the lower vnode and open on the
-	 * upper vnode to ensure that the filesystem keeps
-	 * its references counts right.  This doesn't do
-	 * the right thing with (cred) and (FREAD) though.
-	 * Ignoring error returns is not right, either.
-	 */
-	if (error == 0) {
-		int i;
 
-		for (i = 0; i < un->un_openl; i++) {
-			(void) VOP_CLOSE(lvp, FREAD, cred, td);
-			(void) VOP_OPEN(uvp, FREAD, cred, td, -1);
-		}
-		un->un_openl = 0;
+	if (!error) {
+		cn.cn_flags |= (cnp->cn_flags & HASBUF);
+		cnp->cn_flags = cn.cn_flags;
 	}
 
 	return (error);
@@ -819,550 +529,563 @@
 }
 
 /*
- *	union_relookup:
- *
- *	dvp should be locked on entry and will be locked on return.  No
- *	net change in the ref count will occur.
- *
- *	If an error is returned, *vpp will be invalid, otherwise it
- *	will hold a locked, referenced vnode.  If *vpp == dvp then
- *	remember that only one exclusive lock is held.
+ * Update the unionfs_node.
+ * 
+ * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
+ * uvp's lock and lower's lock will be unlocked.
  */
-
-static int
-union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
-	struct union_mount *um;
-	struct vnode *dvp;
-	struct vnode **vpp;
-	struct componentname *cnp;
-	struct componentname *cn;
-	char *path;
-	int pathlen;
+static void
+unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
+		    struct thread *td)
 {
-	int error;
-
-	/*
-	 * A new componentname structure must be faked up because
-	 * there is no way to know where the upper level cnp came
-	 * from or what it is being used for.  This must duplicate
-	 * some of the work done by NDINIT(), some of the work done
-	 * by namei(), some of the work done by lookup() and some of
-	 * the work done by VOP_LOOKUP() when given a CREATE flag.
-	 * Conclusion: Horrible.
-	 */
-	cn->cn_namelen = pathlen;
-	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
-	bcopy(path, cn->cn_pnbuf, cn->cn_namelen);
-	cn->cn_pnbuf[cn->cn_namelen] = '\0';
-
-	cn->cn_nameiop = CREATE;
-	cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN);
-	cn->cn_thread = cnp->cn_thread;
-	if (um->um_op == UNMNT_ABOVE)
-		cn->cn_cred = cnp->cn_cred;
-	else
-		cn->cn_cred = um->um_cred;
-	cn->cn_nameptr = cn->cn_pnbuf;
-	cn->cn_consume = cnp->cn_consume;
-
-	VREF(dvp);
-	VOP_UNLOCK(dvp, 0, cnp->cn_thread);
+	int		count, lockcnt;
+	struct vnode   *vp;
+	struct vnode   *lvp;
 
-	/*
-	 * Pass dvp unlocked and referenced on call to relookup().
-	 *
-	 * If an error occurs, dvp will be returned unlocked and dereferenced.
-	 */
-
-	if ((error = relookup(dvp, vpp, cn)) != 0) {
-		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_thread);
-		return(error);
-	}
+	vp = UNIONFSTOV(unp);
+	lvp = unp->un_lowervp;
 
 	/*
-	 * If no error occurs, dvp will be returned locked with the reference
-	 * left as before, and vpp will be returned referenced and locked.
-	 *
-	 * We want to return with dvp as it was passed to us, so we get
-	 * rid of our reference.
+	 * lock update
 	 */
-	vrele(dvp);
-	return (0);
+	VI_LOCK(vp);
+	unp->un_uppervp = uvp;
+	vp->v_vnlock = uvp->v_vnlock;
+	lockcnt = lvp->v_vnlock->lk_exclusivecount;
+	if (lockcnt <= 0)
+		panic("unionfs: no exclusive lock");
+	VI_UNLOCK(vp);
+	for (count = 1; count < lockcnt; count++)
+		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td);
 }
 
 /*
- * Create a shadow directory in the upper layer.
- * The new vnode is returned locked.
- *
- * (um) points to the union mount structure for access to the
- * the mounting process's credentials.
- * (dvp) is the directory in which to create the shadow directory,
- * It is locked (but not ref'd) on entry and return.
- * (cnp) is the component name to be created.
- * (vpp) is the returned newly created shadow directory, which
- * is returned locked and ref'd
+ * Create a new shadow dir.
+ * 
+ * udvp should be locked on entry and will be locked on return.
+ * 
+ * If no error returned, unp will be updated.
  */
 int
-union_mkshadow(um, dvp, cnp, vpp)
-	struct union_mount *um;
-	struct vnode *dvp;
-	struct componentname *cnp;
-	struct vnode **vpp;
-{
-	int error;
-	struct vattr va;
-	struct thread *td = cnp->cn_thread;
+unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
+		    struct unionfs_node *unp, struct componentname *cnp,
+		    struct thread *td)
+{
+	int		error;
+	struct vnode   *lvp;
+	struct vnode   *uvp;
+	struct vattr	va;
+	struct vattr	lva;
 	struct componentname cn;
-	struct mount *mp;
+	struct mount   *mp;
+	struct ucred   *cred;
+	struct ucred   *credbk;
+	struct uidinfo *rootinfo;
 
-	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
-		return (error);
-	if ((error = union_relookup(um, dvp, vpp, cnp, &cn,
-			cnp->cn_nameptr, cnp->cn_namelen)) != 0) {
-		vn_finished_write(mp);
-		return (error);
-	}
+	if (unp->un_uppervp != NULLVP)
+		return (EEXIST);
 
-	if (*vpp) {
-		if (cn.cn_flags & HASBUF) {
-			uma_zfree(namei_zone, cn.cn_pnbuf);
-			cn.cn_flags &= ~HASBUF;
-		}
-		if (dvp == *vpp)
-			vrele(*vpp);
+	lvp = unp->un_lowervp;
+	uvp = NULLVP;
+	credbk = cnp->cn_cred;
+
+	/* Authority change to root */
+	rootinfo = uifind((uid_t)0);
+	cred = crdup(cnp->cn_cred);
+	chgproccnt(cred->cr_ruidinfo, 1, 0);
+	change_euid(cred, rootinfo);
+	change_ruid(cred, rootinfo);
+	change_svuid(cred, (uid_t)0);
+	uifree(rootinfo);
+	cnp->cn_cred = cred;
+
+	memset(&cn, 0, sizeof(cn));
+
+	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred, td)))
+		goto unionfs_mkshadowdir_abort;
+
+	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
+		goto unionfs_mkshadowdir_abort;
+	if (uvp != NULLVP) {
+		if (udvp == uvp)
+			vrele(uvp);
 		else
-			vput(*vpp);
+			vput(uvp);
+
+		error = EEXIST;
+		goto unionfs_mkshadowdir_free_out;
+	}
+
+	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
+		goto unionfs_mkshadowdir_free_out;
+	if ((error = VOP_LEASE(udvp, td, cn.cn_cred, LEASE_WRITE))) {
 		vn_finished_write(mp);
-		*vpp = NULLVP;
-		return (EEXIST);
+		goto unionfs_mkshadowdir_free_out;
 	}
+	unionfs_create_uppervattr_core(ump, &lva, &va, td);
 
-	/*
-	 * Policy: when creating the shadow directory in the
-	 * upper layer, create it owned by the user who did
-	 * the mount, group from parent directory, and mode
-	 * 777 modified by umask (ie mostly identical to the
-	 * mkdir syscall).  (jsp, kb)
-	 */
+	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
 
-	VATTR_NULL(&va);
-	va.va_type = VDIR;
-	va.va_mode = um->um_cmode;
+	if (!error) {
+		unionfs_node_update(unp, uvp, td);
 
-	/* VOP_LEASE: dvp is locked */
-	VOP_LEASE(dvp, td, cn.cn_cred, LEASE_WRITE);
+		/*
+		 * XXX The bug which cannot set uid/gid was corrected.
+		 * Ignore errors.
+		 */
+		va.va_type = VNON;
+		VOP_SETATTR(uvp, &va, cn.cn_cred, td);
+	}
+	vn_finished_write(mp);
 
-	error = VOP_MKDIR(dvp, vpp, &cn, &va);
+unionfs_mkshadowdir_free_out:
 	if (cn.cn_flags & HASBUF) {
 		uma_zfree(namei_zone, cn.cn_pnbuf);
 		cn.cn_flags &= ~HASBUF;
 	}
-	/*vput(dvp);*/
-	vn_finished_write(mp);
+
+unionfs_mkshadowdir_abort:
+	cnp->cn_cred = credbk;
+	chgproccnt(cred->cr_ruidinfo, -1, 0);
+	crfree(cred);
+
 	return (error);
 }
 
 /*
- * Create a whiteout entry in the upper layer.
- *
- * (um) points to the union mount structure for access to the
- * the mounting process's credentials.
- * (dvp) is the directory in which to create the whiteout.
- * It is locked on entry and return.
- * (cnp) is the component name to be created.
+ * Create a new whiteout.
+ * 
+ * dvp should be locked on entry and will be locked on return.
  */
 int
-union_mkwhiteout(um, dvp, cnp, path)
-	struct union_mount *um;
-	struct vnode *dvp;
-	struct componentname *cnp;
-	char *path;
+unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
+		   struct thread *td, char *path)
 {
-	int error;
-	struct thread *td = cnp->cn_thread;
-	struct vnode *wvp;
+	int		error;
+	struct vnode   *wvp;
 	struct componentname cn;
-	struct mount *mp;
+	struct mount   *mp;
 
-	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
-		return (error);
-	error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
-	if (error) {
-		vn_finished_write(mp);
-		return (error);
-	}
+	if (path == NULL)
+		path = cnp->cn_nameptr;
 
-	if (wvp) {
+	wvp = NULLVP;
+	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
+		return (error);
+	if (wvp != NULLVP) {
 		if (cn.cn_flags & HASBUF) {
 			uma_zfree(namei_zone, cn.cn_pnbuf);
 			cn.cn_flags &= ~HASBUF;
 		}
-		if (wvp == dvp)
+		if (dvp == wvp)
 			vrele(wvp);
 		else
 			vput(wvp);
-		vn_finished_write(mp);
+
 		return (EEXIST);
 	}
 
-	/* VOP_LEASE: dvp is locked */
-	VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE);
+	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
+		goto unionfs_mkwhiteout_free_out;
+	if (!(error = VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE)))
+		error = VOP_WHITEOUT(dvp, &cn, CREATE);
+
+	vn_finished_write(mp);
 
-	error = VOP_WHITEOUT(dvp, &cn, CREATE);
+unionfs_mkwhiteout_free_out:
 	if (cn.cn_flags & HASBUF) {
 		uma_zfree(namei_zone, cn.cn_pnbuf);
 		cn.cn_flags &= ~HASBUF;
 	}
-	vn_finished_write(mp);
+
 	return (error);
 }
 
 /*
- * union_vn_create: creates and opens a new shadow file
- * on the upper union layer.  This function is similar
- * in spirit to calling vn_open() but it avoids calling namei().
- * The problem with calling namei() is that a) it locks too many
- * things, and b) it doesn't start at the "right" directory,
- * whereas relookup() is told where to start.
- *
- * On entry, the vnode associated with un is locked.  It remains locked
- * on return.
- *
- * If no error occurs, *vpp contains a locked referenced vnode for your
- * use.  If an error occurs *vpp iis undefined.
+ * Create a new vnode for create a new shadow file.
+ * 
+ * If an error is returned, *vpp will be invalid, otherwise it will hold a
+ * locked, referenced and opened vnode.
+ * 
+ * unp is never updated.
  */
 static int
-union_vn_create(vpp, un, td)
-	struct vnode **vpp;
-	struct union_node *un;
-	struct thread *td;
-{
-	struct vnode *vp;
-	struct ucred *cred = td->td_ucred;
-	struct vattr vat;
-	struct vattr *vap = &vat;
-	int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
-	int error;
-	int cmode;
+unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
+			   struct unionfs_node *unp, struct vattr *uvap,
+			   struct thread *td)
+{
+	struct unionfs_mount *ump;
+	struct vnode   *vp;
+	struct vnode   *lvp;
+	struct ucred   *cred;
+	struct vattr	lva;
+	int		fmode;
+	int		error;
 	struct componentname cn;
 
-	*vpp = NULLVP;
-	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
-	cmode = UN_FILEMODE & ~td->td_proc->p_fd->fd_cmask;
-	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
+	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
+	vp = NULLVP;
+	lvp = unp->un_lowervp;
+	cred = td->td_ucred;
+	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
+	error = 0;
 
-	/*
-	 * Build a new componentname structure (for the same
-	 * reasons outlines in union_mkshadow()).
-	 * The difference here is that the file is owned by
-	 * the current user, rather than by the person who
-	 * did the mount, since the current user needs to be
-	 * able to write the file (that's why it is being
-	 * copied in the first place).
-	 */
-	cn.cn_namelen = strlen(un->un_path);
+	if ((error = VOP_GETATTR(lvp, &lva, cred, td)) != 0)
+		return (error);
+	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
+
+	if (unp->un_path == NULL)
+		panic("unionfs: un_path is null");
+
+	cn.cn_namelen = strlen(unp->un_path);
 	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
-	bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
+	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
 	cn.cn_nameiop = CREATE;
-	cn.cn_flags = ISOPEN|LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN;
+	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
+	cn.cn_lkflags = LK_EXCLUSIVE;
 	cn.cn_thread = td;
-	cn.cn_cred = td->td_ucred;
+	cn.cn_cred = cred;
 	cn.cn_nameptr = cn.cn_pnbuf;
 	cn.cn_consume = 0;
 
-	/*
-	 * Pass dvp unlocked and referenced on call to relookup().
-	 *
-	 * If an error occurs, dvp will be returned unlocked and dereferenced.
-	 */
-	VREF(un->un_dirvp);
-	error = relookup(un->un_dirvp, &vp, &cn);
-	if (error)
-		return (error);
+	vref(udvp);
+	if ((error = relookup(udvp, &vp, &cn)) != 0)
+		goto unionfs_vn_create_on_upper_free_out2;
+	vrele(udvp);
 
-	/*
-	 * If no error occurs, dvp will be returned locked with the reference
-	 * left as before, and vpp will be returned referenced and locked.
-	 */
-	if (vp) {
-		vput(un->un_dirvp);
-		if (cn.cn_flags & HASBUF) {
-			uma_zfree(namei_zone, cn.cn_pnbuf);
-			cn.cn_flags &= ~HASBUF;
-		}
-		if (vp == un->un_dirvp)
+	if (vp != NULLVP) {
+		if (vp == udvp)
 			vrele(vp);
 		else
 			vput(vp);
-		return (EEXIST);
+		error = EEXIST;
+		goto unionfs_vn_create_on_upper_free_out1;
 	}
 
-	/*
-	 * Good - there was no race to create the file
-	 * so go ahead and create it.  The permissions
-	 * on the file will be 0666 modified by the
-	 * current user's umask.  Access to the file, while
-	 * it is unioned, will require access to the top *and*
-	 * bottom files.  Access when not unioned will simply
-	 * require access to the top-level file.
-	 * TODO: confirm choice of access permissions.
-	 */
-	VATTR_NULL(vap);
-	vap->va_type = VREG;
-	vap->va_mode = cmode;
-	VOP_LEASE(un->un_dirvp, td, cred, LEASE_WRITE);
-	error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap);
-	if (cn.cn_flags & HASBUF) {
-		uma_zfree(namei_zone, cn.cn_pnbuf);
-		cn.cn_flags &= ~HASBUF;
-	}
-	vput(un->un_dirvp);
-	if (error)
-		return (error);
+	if ((error = VOP_LEASE(udvp, td, cred, LEASE_WRITE)) != 0)
+		goto unionfs_vn_create_on_upper_free_out1;
+
+	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
+		goto unionfs_vn_create_on_upper_free_out1;
 
-	error = VOP_OPEN(vp, fmode, cred, td, -1);
-	if (error) {
+	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
 		vput(vp);
-		return (error);
+		goto unionfs_vn_create_on_upper_free_out1;
 	}
 	vp->v_writecount++;
 	*vpp = vp;
-	return (0);
-}
 
-static int
-union_vn_close(vp, fmode, cred, td)
-	struct vnode *vp;
-	int fmode;
-	struct ucred *cred;
-	struct thread *td;
-{
+unionfs_vn_create_on_upper_free_out1:
+	VOP_UNLOCK(udvp, 0, td);
+
+unionfs_vn_create_on_upper_free_out2:
+	if (cn.cn_flags & HASBUF) {
+		uma_zfree(namei_zone, cn.cn_pnbuf);
+		cn.cn_flags &= ~HASBUF;
+	}
 
-	if (fmode & FWRITE)
-		--vp->v_writecount;
-	return (VOP_CLOSE(vp, fmode, cred, td));
+	return (error);
 }
 
 /*
- *	union_removed_upper:
- *
- *	An upper-only file/directory has been removed; un-cache it so
- *	that unionfs vnode gets reclaimed and the last uppervp reference
- *	disappears.
- *
- *	Called with union_node unlocked.
+ * Copy from lvp to uvp.
+ * 
+ * lvp and uvp should be locked and opened on entry and will be locked and
+ * opened on return.
  */
-
-void
-union_removed_upper(un)
-	struct union_node *un;
+static int
+unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
+		      struct ucred *cred, struct thread *td)
 {
-	if (un->un_flags & UN_CACHED) {
-		int hash = UNION_HASH(un->un_uppervp, un->un_lowervp);
+	int		error;
+	off_t		offset;
+	int		count;
+	int		bufoffset;
+	char           *buf;
+	struct uio	uio;
+	struct iovec	iov;
+
+	error = 0;
+	memset(&uio, 0, sizeof(uio));
+
+	uio.uio_td = td;
+	uio.uio_segflg = UIO_SYSSPACE;
+	uio.uio_offset = 0;
+
+	if ((error = VOP_LEASE(lvp, td, cred, LEASE_READ)) != 0)
+		return (error);
+	if ((error = VOP_LEASE(uvp, td, cred, LEASE_WRITE)) != 0)
+		return (error);
+	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
+
+	while (error == 0) {
+		offset = uio.uio_offset;
+
+		uio.uio_iov = &iov;
+		uio.uio_iovcnt = 1;
+		iov.iov_base = buf;
+		iov.iov_len = MAXBSIZE;
+		uio.uio_resid = iov.iov_len;
+		uio.uio_rw = UIO_READ;
+
+		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
+			break;
+		if ((count = MAXBSIZE - uio.uio_resid) == 0)
+			break;
+
+		bufoffset = 0;
+		while (bufoffset < count) {
+			uio.uio_iov = &iov;
+			uio.uio_iovcnt = 1;
+			iov.iov_base = buf + bufoffset;
+			iov.iov_len = count - bufoffset;
+			uio.uio_offset = offset + bufoffset;
+			uio.uio_resid = iov.iov_len;
+			uio.uio_rw = UIO_WRITE;
 
-		while (union_list_lock(hash))
-			continue;
-		un->un_flags &= ~UN_CACHED;
-		LIST_REMOVE(un, un_cache);
-		union_list_unlock(hash);
+			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
+				break;
+
+			bufoffset += (count - bufoffset) - uio.uio_resid;
+		}
+
+		uio.uio_offset = offset + bufoffset;
 	}
+
+	free(buf, M_TEMP);
+
+	return (error);
 }
 
 /*
- * Determine whether a whiteout is needed
- * during a remove/rmdir operation.
+ * Copy file from lower to upper.
+ * 
+ * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
+ * docopy.
+ * 
+ * If no error returned, unp will be updated.
  */
 int
-union_dowhiteout(un, cred, td)
-	struct union_node *un;
-	struct ucred *cred;
-	struct thread *td;
-{
-	struct vattr va;
-
-	if (un->un_lowervp != NULLVP)
-		return (1);
-
-	if (VOP_GETATTR(un->un_uppervp, &va, cred, td) == 0 &&
-	    (va.va_flags & OPAQUE))
-		return (1);
+unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
+		 struct thread *td)
+{
+	int		error;
+	struct mount   *mp;
+	struct vnode   *udvp;
+	struct vnode   *lvp;
+	struct vnode   *uvp;
+	struct vattr	uva;
+
+	lvp = unp->un_lowervp;
+	uvp = NULLVP;
+
+	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
+		return (EROFS);
+	if (unp->un_dvp == NULLVP)
+		return (EINVAL);
+	if (unp->un_uppervp != NULLVP)
+		return (EEXIST);
+	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
+	if (udvp == NULLVP)
+		return (EROFS);
+	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
+		return (EROFS);
 
-	return (0);
-}
+	error = VOP_ACCESS(lvp, VREAD, cred, td);
+	if (error != 0)
+		return (error);
 
-static void
-union_dircache_r(vp, vppp, cntp)
-	struct vnode *vp;
-	struct vnode ***vppp;
-	int *cntp;
-{
-	struct union_node *un;
+	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
+	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
+	if (error != 0) {
+		vn_finished_write(mp);
+		return (error);
+	}
 
-	if (vp->v_op != &union_vnodeops) {
-		if (vppp) {
-			VREF(vp);
-			*(*vppp)++ = vp;
-			if (--(*cntp) == 0)
-				panic("union: dircache table too small");
-		} else {
-			(*cntp)++;
+	if (docopy != 0) {
+		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
+		if (error == 0) {
+			error = unionfs_copyfile_core(lvp, uvp, cred, td);
+			VOP_CLOSE(lvp, FREAD, cred, td);
 		}
-	} else {
-		un = VTOUNION(vp);
-		if (un->un_uppervp != NULLVP)
-			union_dircache_r(un->un_uppervp, vppp, cntp);
-		if (un->un_lowervp != NULLVP)
-			union_dircache_r(un->un_lowervp, vppp, cntp);
 	}
-}
+	VOP_CLOSE(uvp, FWRITE, cred, td);
+	uvp->v_writecount--;
 
-struct vnode *
-union_dircache_get(vp, td)
-	struct vnode *vp;
-	struct thread *td;
-{
-	int cnt;
-	struct vnode *nvp;
-	struct vnode **vpp;
-	struct vnode **dircache, **newdircache;
-	struct union_node *un;
-	int error;
+	vn_finished_write(mp);
 
-	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
-	un = VTOUNION(vp);
-	dircache = un->un_dircache;
-	newdircache = NULL;
-
-	nvp = NULLVP;
-
-	if (dircache == NULL) {
-		cnt = 0;
-		union_dircache_r(vp, 0, &cnt);
-		cnt++;
-		newdircache = dircache = malloc(cnt * sizeof(struct vnode *),
-						M_UNDCACHE, M_WAITOK);
-		vpp = dircache;
-		union_dircache_r(vp, &vpp, &cnt);
-		*vpp = NULLVP;
-		vpp = dircache + 1;
-	} else {
-		vpp = dircache;
-		do {
-			if (*vpp++ == un->un_uppervp)
-				break;
-		} while (*vpp != NULLVP);
+	if (error == 0) {
+		/* Reset the attributes. Ignore errors. */
+		uva.va_type = VNON;
+		VOP_SETATTR(uvp, &uva, cred, td);
 	}
 
-	if (*vpp == NULLVP)
-		goto out;
-
-	/*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td);*/
-	UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? vrefcnt(*vpp) : -99)));
-	VREF(*vpp);
-	error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0);
-	UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? vrefcnt(*vpp) : -99)));
-	if (error)
-		goto out;
+	unionfs_node_update(unp, uvp, td);
 
-	un->un_dircache = NULL;
-	VTOUNION(nvp)->un_dircache = dircache;
-	newdircache = NULL;
+	return (error);
+}
 
-out:
+/*
+ * It checks whether vp can rmdir. (check empty)
+ *
+ * vp is unionfs vnode.
+ * vp should be locked.
+ */
+int
+unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
+{
+	int		error;
+	int		eofflag;
+	int		lookuperr;
+	struct vnode   *uvp;
+	struct vnode   *lvp;
+	struct vnode   *tvp;
+	struct vattr	va;
+	struct componentname cn;
 	/*
-	 * If we allocated a new dircache and couldn't attach
-	 * it to a new vp, free the resources we allocated.
+	 * The size of buf needs to be larger than DIRBLKSIZ.
 	 */
-	if (newdircache) {
-		for (vpp = newdircache; *vpp != NULLVP; vpp++)
-			vrele(*vpp);
-		free(newdircache, M_UNDCACHE);
-	}
+	char		buf[256 * 6];
+	struct dirent  *dp;
+	struct dirent  *edp;
+	struct uio	uio;
+	struct iovec	iov;
+
+	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
+
+	eofflag = 0;
+	uvp = UNIONFSVPTOUPPERVP(vp);
+	lvp = UNIONFSVPTOLOWERVP(vp);
 
-	VOP_UNLOCK(vp, 0, td);
-	return (nvp);
-}
+	/* check opaque */
+	if ((error = VOP_GETATTR(uvp, &va, cred, td)) != 0)
+		return (error);
+	if (va.va_flags & OPAQUE)
+		return (0);
 
-void
-union_dircache_free(struct union_node *un)
-{
-	struct vnode **vpp;
+	/* open vnode */
+#ifdef MAC
+	if ((error = mac_check_vnode_open(cred, vp, VEXEC|VREAD)) != 0)
+		return (error);
+#endif
+	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
+		return (error);
+	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
+		return (error);
 
-	for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
-		vrele(*vpp);
-	free(un->un_dircache, M_UNDCACHE);
-	un->un_dircache = NULL;
-}
+	uio.uio_rw = UIO_READ;
+	uio.uio_segflg = UIO_SYSSPACE;
+	uio.uio_td = td;
+	uio.uio_offset = 0;
 
-/*
- * Module glue to remove #ifdef UNION from vfs_syscalls.c
- */
-static int
-union_dircheck(struct thread *td, struct vnode **vp, struct file *fp)
-{
-	int error = 0;
+#ifdef MAC
+	error = mac_check_vnode_readdir(td->td_ucred, lvp);
+#endif
+	while (!error && !eofflag) {
+		iov.iov_base = buf;
+		iov.iov_len = sizeof(buf);
+		uio.uio_iov = &iov;
+		uio.uio_iovcnt = 1;
+		uio.uio_resid = iov.iov_len;
 
-	if ((*vp)->v_op == &union_vnodeops) {
-		struct vnode *lvp;
+		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
+		if (error)
+			break;
+
+		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
+		for (dp = (struct dirent*)buf; !error && dp < edp;
+		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
+			if (dp->d_type == DT_WHT ||
+			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
+			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
+				continue;
 
-		lvp = union_dircache_get(*vp, td);
-		if (lvp != NULLVP) {
-			struct vattr va;
+			cn.cn_namelen = dp->d_namlen;
+			cn.cn_pnbuf = NULL;
+			cn.cn_nameptr = dp->d_name;
+			cn.cn_nameiop = LOOKUP;
+			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
+			cn.cn_lkflags = LK_EXCLUSIVE;
+			cn.cn_thread = td;
+			cn.cn_cred = cred;
+			cn.cn_consume = 0;
 
 			/*
-			 * If the directory is opaque,
-			 * then don't show lower entries
+			 * check entry in lower.
+			 * Sometimes, readdir function returns
+			 * wrong entry.
 			 */
-			error = VOP_GETATTR(*vp, &va, fp->f_cred, td);
-			if (va.va_flags & OPAQUE) {
-				vput(lvp);
-				lvp = NULLVP;
-			}
-		}
+			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
+
+			if (!lookuperr)
+				vput(tvp);
+			else
+				continue; /* skip entry */
 
-		if (lvp != NULLVP) {
-			error = VOP_OPEN(lvp, FREAD, fp->f_cred, td, -1);
-			if (error) {
-				vput(lvp);
-				return (error);
-			}
-			VOP_UNLOCK(lvp, 0, td);
-			FILE_LOCK(fp);
-			fp->f_vnode = lvp;
-			fp->f_data = lvp;
-			fp->f_offset = 0;
-			FILE_UNLOCK(fp);
-			error = vn_close(*vp, FREAD, fp->f_cred, td);
-			if (error)
-				return (error);
-			*vp = lvp;
-			return -1;	/* goto unionread */
+			/*
+			 * check entry
+			 * If it has no exist/whiteout entry in upper,
+			 * directory is not empty.
+			 */
+			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
+			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
+
+			if (!lookuperr)
+				vput(tvp);
+
+			/* ignore exist or whiteout entry */
+			if (!lookuperr ||
+			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
+				continue;
+
+			error = ENOTEMPTY;
 		}
 	}
-	return error;
+
+	/* close vnode */
+	VOP_CLOSE(vp, FREAD, cred, td);
+
+	return (error);
 }
 
-static int
-union_modevent(module_t mod, int type, void *data)
+#ifdef DIAGNOSTIC
+
+struct vnode   *
+unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
 {
-	switch (type) {
-	case MOD_LOAD:
-		union_dircheckp = union_dircheck;
-		break;
-	case MOD_UNLOAD:
-		union_dircheckp = NULL;
-		break;
-	default:
-		return EOPNOTSUPP;
-		break;
-	}
-	return 0;
-}
+	struct unionfs_node *unp;
 
-static moduledata_t union_mod = {
-	"union_dircheck",
-	union_modevent,
-	NULL
-};
+	unp = VTOUNIONFS(vp);
 
-DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY);
+#ifdef notyet
+	if (vp->v_op != unionfs_vnodeop_p) {
+		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
+#ifdef KDB
+		kdb_enter("unionfs_checkuppervp: on non-unionfs-node.\n");
+#endif
+		panic("unionfs_checkuppervp");
+	};
+#endif
+	return (unp->un_uppervp);
+}
+
+struct vnode   *
+unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
+{
+	struct unionfs_node *unp;
+
+	unp = VTOUNIONFS(vp);
+
+#ifdef notyet
+	if (vp->v_op != unionfs_vnodeop_p) {
+		printf("unionfs_checklowervp: on non-unionfs-node.\n");
+#ifdef KDB
+		kdb_enter("unionfs_checklowervp: on non-unionfs-node.\n");
+#endif
+		panic("unionfs_checklowervp");
+	};
+#endif
+	return (unp->un_lowervp);
+}
+#endif
Index: union.h
===================================================================
RCS file: /home/cvs/src/sys/fs/unionfs/union.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/fs/unionfs/union.h -L sys/fs/unionfs/union.h -u -r1.1.1.1 -r1.2
--- sys/fs/unionfs/union.h
+++ sys/fs/unionfs/union.h
@@ -1,6 +1,8 @@
 /*-
  * Copyright (c) 1994 The Regents of the University of California.
  * Copyright (c) 1994 Jan-Simon Pendry.
+ * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa at ongs.co.jp>, ONGS Inc.
+ * Copyright (c) 2006 Daichi Goto <daichi at freebsd.org>
  * All rights reserved.
  *
  * This code is derived from software donated to Berkeley by
@@ -31,111 +33,110 @@
  * SUCH DAMAGE.
  *
  *	@(#)union.h	8.9 (Berkeley) 12/10/94
- * $FreeBSD: src/sys/fs/unionfs/union.h,v 1.31 2005/01/06 18:10:42 imp Exp $
+ * $FreeBSD: src/sys/fs/unionfs/union.h,v 1.34.2.2 2007/10/22 05:41:54 daichi Exp $
  */
 
-#define UNMNT_ABOVE	0x0001		/* Target appears above mount point */
-#define UNMNT_BELOW	0x0002		/* Target appears below mount point */
-#define UNMNT_REPLACE	0x0003		/* Target replaces mount point */
-
-struct union_mount {
-	struct vnode	*um_uppervp;	/* UN_ULOCK holds locking state */
-	struct vnode	*um_lowervp;	/* Left unlocked */
-	struct ucred	*um_cred;	/* Credentials of user calling mount */
-	int		um_cmode;	/* cmask from mount process */
-	int		um_op;		/* Operation mode */
-	dev_t		um_upperdev;	/* Upper root node fsid[0]*/
-};
-
 #ifdef _KERNEL
 
-#ifndef DIAGNOSTIC
-#define DIAGNOSTIC
-#endif
+/* copy method of attr from lower to upper */
+typedef enum _unionfs_copymode {
+	UNIONFS_TRADITIONAL = 0,
+	UNIONFS_TRANSPARENT,
+	UNIONFS_MASQUERADE
+} unionfs_copymode;
+
+/* whiteout policy of upper layer */
+typedef enum _unionfs_whitemode {
+       UNIONFS_WHITE_ALWAYS = 0,
+       UNIONFS_WHITE_WHENNEEDED
+} unionfs_whitemode;
+
+struct unionfs_mount {
+	struct vnode   *um_lowervp;	/* VREFed once */
+	struct vnode   *um_uppervp;	/* VREFed once */
+	struct vnode   *um_rootvp;	/* ROOT vnode */
+	unionfs_copymode um_copymode;
+	unionfs_whitemode um_whitemode;
+	uid_t		um_uid;
+	gid_t		um_gid;
+	u_short		um_udir;
+	u_short		um_ufile;
+};
 
-/*
- * DEFDIRMODE is the mode bits used to create a shadow directory.
- */
-#define VRWXMODE (VREAD|VWRITE|VEXEC)
-#define VRWMODE (VREAD|VWRITE)
-#define UN_DIRMODE ((VRWXMODE)|(VRWXMODE>>3)|(VRWXMODE>>6))
-#define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6))
+/* unionfs status list */
+struct unionfs_node_status {
+	LIST_ENTRY(unionfs_node_status) uns_list;	/* Status list */
+	lwpid_t		uns_tid;		/* current thread id */
+	int		uns_node_flag;		/* uns flag */
+	int		uns_lower_opencnt;	/* open count of lower */
+	int		uns_upper_opencnt;	/* open count of upper */
+	int		uns_lower_openmode;	/* open mode of lower */
+	int		uns_readdir_status;	/* read status of readdir */
+};
 
-/*
- * A cache of vnode references	(hangs off v_data)
- */
-struct union_node {
-	LIST_ENTRY(union_node)	un_cache;	/* Hash chain */
-	struct vnode		*un_vnode;	/* Back pointer */
-	struct vnode	        *un_uppervp;	/* overlaying object */
-	struct vnode	        *un_lowervp;	/* underlying object */
-	struct vnode		*un_dirvp;	/* Parent dir of uppervp */
-	struct vnode		*un_pvp;	/* Parent vnode */
-	char			*un_path;	/* saved component name */
-	int			un_openl;	/* # of opens on lowervp */
-	int			un_exclcnt;	/* exclusive count */
-	unsigned int		un_flags;
-	struct vnode		**un_dircache;	/* cached union stack */
-	off_t			un_uppersz;	/* size of upper object */
-	off_t			un_lowersz;	/* size of lower object */
-#ifdef DIAGNOSTIC
-	pid_t			un_pid;
-#endif
+/* union node status flags */
+#define	UNS_OPENL_4_READDIR	0x01	/* open lower layer for readdir */
+
+/* A cache of vnode references */
+struct unionfs_node {
+	struct vnode   *un_lowervp;		/* lower side vnode */
+	struct vnode   *un_uppervp;		/* upper side vnode */
+	struct vnode   *un_dvp;			/* parent unionfs vnode */
+	struct vnode   *un_vnode;		/* Back pointer */
+	LIST_HEAD(, unionfs_node_status) un_unshead;  /* unionfs status head */
+	char           *un_path;		/* path */
+	int		un_flag;		/* unionfs node flag */
 };
 
 /*
- * XXX UN_ULOCK -	indicates that the uppervp is locked
- *
- * UN_CACHED -	node is in the union cache
+ * unionfs node flags
+ * It needs the vnode with exclusive lock, when changing the un_flag variable.
  */
+#define UNIONFS_OPENEXTL	0x01	/* openextattr (lower) */
+#define UNIONFS_OPENEXTU	0x02	/* openextattr (upper) */
 
-/*#define UN_ULOCK	0x04*/	/* Upper node is locked */
-#define UN_CACHED	0x10	/* In union cache */
+#define	MOUNTTOUNIONFSMOUNT(mp) ((struct unionfs_mount *)((mp)->mnt_data))
+#define	VTOUNIONFS(vp) ((struct unionfs_node *)(vp)->v_data)
+#define	UNIONFSTOV(xp) ((xp)->un_vnode)
+
+int unionfs_init(struct vfsconf *vfsp);
+int unionfs_uninit(struct vfsconf *vfsp);
+int unionfs_nodeget(struct mount *mp, struct vnode *uppervp, struct vnode *lowervp, struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct thread *td);
+void unionfs_noderem(struct vnode *vp, struct thread *td);
+void unionfs_get_node_status(struct unionfs_node *unp, struct thread *td, struct unionfs_node_status **unspp);
+void unionfs_tryrem_node_status(struct unionfs_node *unp, struct thread *td, struct unionfs_node_status *unsp);
+
+int unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td);
+int unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred, struct thread *td);
+void unionfs_create_uppervattr_core(struct unionfs_mount *ump, struct vattr *lva, struct vattr *uva, struct thread *td);
+int unionfs_create_uppervattr(struct unionfs_mount *ump, struct vnode *lvp, struct vattr *uva, struct ucred *cred, struct thread *td);
+int unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *duvp, struct unionfs_node *unp, struct componentname *cnp, struct thread *td);
+int unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp, struct thread *td, char *path);
+int unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp, struct thread *td);
+int unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp, struct thread *td);
+int unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp, struct thread *td);
 
-/*
- * Hash table locking flags
- */
+#ifdef DIAGNOSTIC
+struct vnode   *unionfs_checklowervp(struct vnode *vp, char *fil, int lno);
+struct vnode   *unionfs_checkuppervp(struct vnode *vp, char *fil, int lno);
+#define	UNIONFSVPTOLOWERVP(vp) unionfs_checklowervp((vp), __FILE__, __LINE__)
+#define	UNIONFSVPTOUPPERVP(vp) unionfs_checkuppervp((vp), __FILE__, __LINE__)
+#else
+#define	UNIONFSVPTOLOWERVP(vp) (VTOUNIONFS(vp)->un_lowervp)
+#define	UNIONFSVPTOUPPERVP(vp) (VTOUNIONFS(vp)->un_uppervp)
+#endif
 
-#define UNVP_WANT	0x01
-#define UNVP_LOCKED	0x02
+extern struct vop_vector unionfs_vnodeops;
+
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_UNIONFSNODE);
+MALLOC_DECLARE(M_UNIONFSPATH);
+#endif
 
-extern int union_allocvp(struct vnode **, struct mount *,
-				struct vnode *, 
-				struct vnode *, 
-				struct componentname *, struct vnode *,
-				struct vnode *, int);
-extern int union_freevp(struct vnode *);
-extern struct vnode *union_dircache_get(struct vnode *, struct thread *);
-extern void union_dircache_free(struct union_node *);
-extern int union_copyup(struct union_node *, int, struct ucred *,
-				struct thread *);
-extern int union_dowhiteout(struct union_node *, struct ucred *,
-					struct thread *);
-extern int union_mkshadow(struct union_mount *, struct vnode *,
-				struct componentname *, struct vnode **);
-extern int union_mkwhiteout(struct union_mount *, struct vnode *,
-				struct componentname *, char *);
-extern int union_cn_close(struct vnode *, int, struct ucred *,
-				struct thread *);
-extern void union_removed_upper(struct union_node *un);
-extern struct vnode *union_lowervp(struct vnode *);
-extern void union_newsize(struct vnode *, off_t, off_t);
-
-extern int (*union_dircheckp)(struct thread *, struct vnode **,
-				 struct file *);
-
-#define	MOUNTTOUNIONMOUNT(mp) ((struct union_mount *)((mp)->mnt_data))
-#define	VTOUNION(vp) ((struct union_node *)(vp)->v_data)
-#define	UNIONTOV(un) ((un)->un_vnode)
-#define	LOWERVP(vp) (VTOUNION(vp)->un_lowervp)
-#define	UPPERVP(vp) (VTOUNION(vp)->un_uppervp)
-#define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp))
-
-#define UDEBUG(x)	if (uniondebug) printf x
-#define UDEBUG_ENABLED	1
-
-extern struct vop_vector union_vnodeops;
-extern struct vfsops union_vfsops;
-extern int uniondebug;
+#ifdef UNIONFS_DEBUG
+#define UNIONFSDEBUG(format, args...) printf(format ,## args)
+#else
+#define UNIONFSDEBUG(format, args...)
+#endif				/* UNIONFS_DEBUG */
 
-#endif /* _KERNEL */
+#endif				/* _KERNEL */


More information about the Midnightbsd-cvs mailing list