[Midnightbsd-cvs] src [9938] trunk/sys/net: sync with freebsd

Fri May 25 16:05:59 EDT 2018

Revision: 9938
          http://svnweb.midnightbsd.org/src/?rev=9938
Author:   laffer1
Date:     2018-05-25 16:05:59 -0400 (Fri, 25 May 2018)
Log Message:
-----------
sync with freebsd

Modified Paths:
--------------
    trunk/sys/net/bpf.c
    trunk/sys/net/bpf.h
    trunk/sys/net/bpf_buffer.c
    trunk/sys/net/bpf_buffer.h
    trunk/sys/net/bpf_filter.c
    trunk/sys/net/bpf_jitter.c
    trunk/sys/net/bpf_jitter.h
    trunk/sys/net/bpf_zerocopy.c
    trunk/sys/net/bpf_zerocopy.h
    trunk/sys/net/bpfdesc.h
    trunk/sys/net/bridgestp.c
    trunk/sys/net/bridgestp.h
    trunk/sys/net/ethernet.h
    trunk/sys/net/fddi.h
    trunk/sys/net/firewire.h
    trunk/sys/net/flowtable.c
    trunk/sys/net/flowtable.h
    trunk/sys/net/ieee8023ad_lacp.c
    trunk/sys/net/ieee8023ad_lacp.h
    trunk/sys/net/if.c
    trunk/sys/net/if.h
    trunk/sys/net/if_arc.h
    trunk/sys/net/if_arcsubr.c
    trunk/sys/net/if_arp.h
    trunk/sys/net/if_atm.h
    trunk/sys/net/if_atmsubr.c
    trunk/sys/net/if_bridge.c
    trunk/sys/net/if_bridgevar.h
    trunk/sys/net/if_clone.c
    trunk/sys/net/if_clone.h
    trunk/sys/net/if_dead.c
    trunk/sys/net/if_debug.c
    trunk/sys/net/if_disc.c
    trunk/sys/net/if_dl.h
    trunk/sys/net/if_edsc.c
    trunk/sys/net/if_ef.c
    trunk/sys/net/if_enc.c
    trunk/sys/net/if_enc.h
    trunk/sys/net/if_epair.c
    trunk/sys/net/if_ethersubr.c
    trunk/sys/net/if_faith.c
    trunk/sys/net/if_fddisubr.c
    trunk/sys/net/if_fwsubr.c
    trunk/sys/net/if_gif.c
    trunk/sys/net/if_gif.h
    trunk/sys/net/if_gre.c
    trunk/sys/net/if_gre.h
    trunk/sys/net/if_iso88025subr.c
    trunk/sys/net/if_lagg.c
    trunk/sys/net/if_lagg.h
    trunk/sys/net/if_llatbl.c
    trunk/sys/net/if_llatbl.h
    trunk/sys/net/if_llc.h
    trunk/sys/net/if_loop.c
    trunk/sys/net/if_media.c
    trunk/sys/net/if_media.h
    trunk/sys/net/if_mib.c
    trunk/sys/net/if_mib.h
    trunk/sys/net/if_sppp.h
    trunk/sys/net/if_spppfr.c
    trunk/sys/net/if_spppsubr.c
    trunk/sys/net/if_stf.c
    trunk/sys/net/if_stf.h
    trunk/sys/net/if_tap.c
    trunk/sys/net/if_tap.h
    trunk/sys/net/if_tapvar.h
    trunk/sys/net/if_tun.c
    trunk/sys/net/if_tun.h
    trunk/sys/net/if_types.h
    trunk/sys/net/if_var.h
    trunk/sys/net/if_vlan.c
    trunk/sys/net/if_vlan_var.h
    trunk/sys/net/iso88025.h
    trunk/sys/net/netisr.c
    trunk/sys/net/netisr.h
    trunk/sys/net/netisr_internal.h
    trunk/sys/net/netmap.h
    trunk/sys/net/netmap_user.h
    trunk/sys/net/pfil.c
    trunk/sys/net/pfil.h
    trunk/sys/net/pfkeyv2.h
    trunk/sys/net/ppp_defs.h
    trunk/sys/net/radix.c
    trunk/sys/net/radix.h
    trunk/sys/net/radix_mpath.c
    trunk/sys/net/radix_mpath.h
    trunk/sys/net/raw_cb.c
    trunk/sys/net/raw_cb.h
    trunk/sys/net/raw_usrreq.c
    trunk/sys/net/route.c
    trunk/sys/net/route.h
    trunk/sys/net/rtsock.c
    trunk/sys/net/slcompress.c
    trunk/sys/net/slcompress.h
    trunk/sys/net/vnet.c
    trunk/sys/net/vnet.h
    trunk/sys/net/zlib.c
    trunk/sys/net/zlib.h
    trunk/sys/net/zutil.h

Added Paths:
-----------
    trunk/sys/net/if_me.c
    trunk/sys/net/if_pflog.h
    trunk/sys/net/if_pfsync.h
    trunk/sys/net/if_vxlan.c
    trunk/sys/net/if_vxlan.h
    trunk/sys/net/mppc.h
    trunk/sys/net/mppcc.c
    trunk/sys/net/mppcd.c
    trunk/sys/net/paravirt.h
    trunk/sys/net/pfvar.h
    trunk/sys/net/rndis.h
    trunk/sys/net/sff8436.h
    trunk/sys/net/sff8472.h

Modified: trunk/sys/net/bpf.c
===================================================================

--- trunk/sys/net/bpf.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/bpf.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/bpf.c 315625 2017-03-20 08:16:05Z ae $");
 
 #include "opt_bpf.h"
 #include "opt_compat.h"
@@ -142,7 +142,7 @@
  * structures registered by different layers in the stack (i.e., 802.11
  * frames, ethernet frames, etc).
  */
-static LIST_HEAD(, bpf_if)	bpf_iflist;
+static LIST_HEAD(, bpf_if)	bpf_iflist, bpf_freelist;
 static struct mtx	bpf_mtx;		/* bpf global lock */
 static int		bpf_bpfd_cnt;
 
@@ -523,32 +523,15 @@
 	}
 
 	len = uio->uio_resid;
-
-	if (len - hlen > ifp->if_mtu)
+	if (len < hlen || len - hlen > ifp->if_mtu)
 		return (EMSGSIZE);
 
-	if ((unsigned)len > MJUM16BYTES)
+	m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR);
+	if (m == NULL)
 		return (EIO);
-
-	if (len <= MHLEN)
-		MGETHDR(m, M_WAIT, MT_DATA);
-	else if (len <= MCLBYTES)
-		m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
-	else
-		m = m_getjcl(M_WAIT, MT_DATA, M_PKTHDR,
-#if (MJUMPAGESIZE > MCLBYTES)
-		    len <= MJUMPAGESIZE ? MJUMPAGESIZE :
-#endif
-		    (len <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES));
 	m->m_pkthdr.len = m->m_len = len;
-	m->m_pkthdr.rcvif = NULL;
 	*mp = m;
 
-	if (m->m_len < hlen) {
-		error = EPERM;
-		goto bad;
-	}
-
 	error = uiomove(mtod(m, u_char *), len, uio);
 	if (error)
 		goto bad;
@@ -618,13 +601,13 @@
 	 * Save sysctl value to protect from sysctl change
 	 * between reads
 	 */
-	op_w = V_bpf_optimize_writers;
+	op_w = V_bpf_optimize_writers || d->bd_writer;
 
 	if (d->bd_bif != NULL)
 		bpf_detachd_locked(d);
 	/*
 	 * Point d at bp, and add d to the interface's list.
-	 * Since there are many applicaiotns using BPF for
+	 * Since there are many applications using BPF for
 	 * sending raw packets only (dhcpd, cdpd are good examples)
 	 * we can delay adding d to the list of active listeners until
 	 * some filter is configured.
@@ -661,7 +644,7 @@
 
 /*
  * Add d to the list of active bp filters.
- * Reuqires bpf_attachd() to be called before
+ * Requires bpf_attachd() to be called before.
  */
 static void
 bpf_upgraded(struct bpf_d *d)
@@ -805,7 +788,7 @@
 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
 	struct bpf_d *d;
-	int error, size;
+	int error;
 
 	d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
 	error = devfs_set_cdevpriv(d, bpf_dtor);
@@ -820,6 +803,9 @@
 	 * particular buffer method.
 	 */
 	bpf_buffer_init(d);
+	if ((flags & FREAD) == 0)
+		d->bd_writer = 2;
+	d->bd_hbuf_in_use = 0;
 	d->bd_bufmode = BPF_BUFMODE_BUFFER;
 	d->bd_sig = SIGIO;
 	d->bd_direction = BPF_D_INOUT;
@@ -832,10 +818,6 @@
 	callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
 	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
 
-	/* Allocate default buffers */
-	size = d->bd_bufsize;
-	bpf_buffer_ioctl_sblen(d, &size);
-
 	return (0);
 }
 
@@ -873,6 +855,14 @@
 		callout_stop(&d->bd_callout);
 	timed_out = (d->bd_state == BPF_TIMED_OUT);
 	d->bd_state = BPF_IDLE;
+	while (d->bd_hbuf_in_use) {
+		error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
+		    PRINET|PCATCH, "bd_hbuf", 0);
+		if (error != 0) {
+			BPFD_UNLOCK(d);
+			return (error);
+		}
+	}
 	/*
 	 * If the hold buffer is empty, then do a timed sleep, which
 	 * ends when the timeout expires or when enough packets
@@ -941,6 +931,7 @@
 	/*
 	 * At this point, we know we have something in the hold slot.
 	 */
+	d->bd_hbuf_in_use = 1;
 	BPFD_UNLOCK(d);
 
 	/*
@@ -947,18 +938,20 @@
 	 * Move data from hold buffer into user space.
 	 * We know the entire buffer is transferred since
 	 * we checked above that the read buffer is bpf_bufsize bytes.
-	 *
-	 * XXXRW: More synchronization needed here: what if a second thread
-	 * issues a read on the same fd at the same time?  Don't want this
-	 * getting invalidated.
+  	 *
+	 * We do not have to worry about simultaneous reads because
+	 * we waited for sole access to the hold buffer above.
 	 */
 	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
 
 	BPFD_LOCK(d);
+	KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
 	d->bd_fbuf = d->bd_hbuf;
 	d->bd_hbuf = NULL;
 	d->bd_hlen = 0;
 	bpf_buf_reclaimed(d);
+	d->bd_hbuf_in_use = 0;
+	wakeup(&d->bd_hbuf_in_use);
 	BPFD_UNLOCK(d);
 
 	return (error);
@@ -1062,7 +1055,7 @@
 		dst.sa_family = pseudo_AF_HDRCMPLT;
 
 	if (d->bd_feedback) {
-		mc = m_dup(m, M_DONTWAIT);
+		mc = m_dup(m, M_NOWAIT);
 		if (mc != NULL)
 			mc->m_pkthdr.rcvif = ifp;
 		/* Set M_PROMISC for outgoing packets to be discarded. */
@@ -1112,6 +1105,9 @@
 
 	BPFD_LOCK_ASSERT(d);
 
+	while (d->bd_hbuf_in_use)
+		mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET,
+		    "bd_hbuf", 0);
 	if ((d->bd_hbuf != NULL) &&
 	    (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
 		/* Free the hold buffer. */
@@ -1195,7 +1191,7 @@
 #endif
 		case BIOCGETIF:
 		case BIOCGRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 		case BIOCGRTIMEOUT32:
 #endif
 		case BIOCGSTATS:
@@ -1207,7 +1203,7 @@
 		case FIONREAD:
 		case BIOCLOCK:
 		case BIOCSRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 		case BIOCSRTIMEOUT32:
 #endif
 		case BIOCIMMEDIATE:
@@ -1252,6 +1248,9 @@
 
 			BPFD_LOCK(d);
 			n = d->bd_slen;
+			while (d->bd_hbuf_in_use)
+				mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
+				    PRINET, "bd_hbuf", 0);
 			if (d->bd_hbuf)
 				n += d->bd_hlen;
 			BPFD_UNLOCK(d);
@@ -1409,21 +1408,44 @@
 	 * Set interface.
 	 */
 	case BIOCSETIF:
-		BPF_LOCK();
-		error = bpf_setif(d, (struct ifreq *)addr);
-		BPF_UNLOCK();
-		break;
+		{
+			int alloc_buf, size;
 
+			/*
+			 * Behavior here depends on the buffering model.  If
+			 * we're using kernel memory buffers, then we can
+			 * allocate them here.  If we're using zero-copy,
+			 * then the user process must have registered buffers
+			 * by the time we get here.
+			 */
+			alloc_buf = 0;
+			BPFD_LOCK(d);
+			if (d->bd_bufmode == BPF_BUFMODE_BUFFER &&
+			    d->bd_sbuf == NULL)
+				alloc_buf = 1;
+			BPFD_UNLOCK(d);
+			if (alloc_buf) {
+				size = d->bd_bufsize;
+				error = bpf_buffer_ioctl_sblen(d, &size);
+				if (error != 0)
+					break;
+			}
+			BPF_LOCK();
+			error = bpf_setif(d, (struct ifreq *)addr);
+			BPF_UNLOCK();
+			break;
+		}
+
 	/*
 	 * Set read timeout.
 	 */
 	case BIOCSRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 	case BIOCSRTIMEOUT32:
 #endif
 		{
 			struct timeval *tv = (struct timeval *)addr;
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 			struct timeval32 *tv32;
 			struct timeval tv64;
 
@@ -1449,12 +1471,12 @@
 	 * Get read timeout.
 	 */
 	case BIOCGRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 	case BIOCGRTIMEOUT32:
 #endif
 		{
 			struct timeval *tv;
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 			struct timeval32 *tv32;
 			struct timeval tv64;
 
@@ -1466,7 +1488,7 @@
 
 			tv->tv_sec = d->bd_rtout / hz;
 			tv->tv_usec = (d->bd_rtout % hz) * tick;
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 			if (cmd == BIOCGRTIMEOUT32) {
 				tv32 = (struct timeval32 *)addr;
 				tv32->tv_sec = tv->tv_sec;
@@ -1859,10 +1881,8 @@
 	BPFIF_RUNLOCK(bp);
 
 	/*
-	 * Behavior here depends on the buffering model.  If we're using
-	 * kernel memory buffers, then we can allocate them here.  If we're
-	 * using zero-copy, then the user process must have registered
-	 * buffers by the time we get here.  If not, return an error.
+	 * At this point, we expect the buffer is already allocated.  If not,
+	 * return an error.
 	 */
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
@@ -1965,7 +1985,10 @@
 	ready = bpf_ready(d);
 	if (ready) {
 		kn->kn_data = d->bd_slen;
-		if (d->bd_hbuf)
+		/*
+		 * Ignore the hold buffer if it is being copied to user space.
+		 */
+		if (!d->bd_hbuf_in_use && d->bd_hbuf)
 			kn->kn_data += d->bd_hlen;
 	} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
 		callout_reset(&d->bd_callout, d->bd_rtout,
@@ -2339,6 +2362,7 @@
 			++d->bd_dcount;
 			return;
 		}
+		KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use"));
 		ROTATE_BUFFERS(d);
 		do_wakeup = 1;
 		curlen = 0;
@@ -2477,7 +2501,7 @@
 
 	bp->bif_hdrlen = hdrlen;
 
-	if (bootverbose)
+	if (bootverbose && IS_DEFAULT_VNET(curvnet))
 		if_printf(ifp, "bpf attached\n");
 }
 
@@ -2489,52 +2513,51 @@
 void
 bpfdetach(struct ifnet *ifp)
 {
-	struct bpf_if	*bp;
+	struct bpf_if	*bp, *bp_temp;
 	struct bpf_d	*d;
-#ifdef INVARIANTS
 	int ndetached;
 
 	ndetached = 0;
-#endif
 
 	BPF_LOCK();
 	/* Find all bpf_if struct's which reference ifp and detach them. */
-	do {
-		LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-			if (ifp == bp->bif_ifp)
-				break;
-		}
-		if (bp != NULL)
-			LIST_REMOVE(bp, bif_next);
+	LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
+		if (ifp != bp->bif_ifp)
+			continue;
 
-		if (bp != NULL) {
-#ifdef INVARIANTS
-			ndetached++;
-#endif
-			while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
-				bpf_detachd_locked(d);
-				BPFD_LOCK(d);
-				bpf_wakeup(d);
-				BPFD_UNLOCK(d);
-			}
-			/* Free writer-only descriptors */
-			while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
-				bpf_detachd_locked(d);
-				BPFD_LOCK(d);
-				bpf_wakeup(d);
-				BPFD_UNLOCK(d);
-			}
+		LIST_REMOVE(bp, bif_next);
+		/* Add to to-be-freed list */
+		LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next);
 
-			/*
-			 * Delay freing bp till interface is detached
-			 * and all routes through this interface are removed.
-			 * Mark bp as detached to restrict new consumers.
-			 */
-			BPFIF_WLOCK(bp);
-			bp->flags |= BPFIF_FLAG_DYING;
-			BPFIF_WUNLOCK(bp);
+		ndetached++;
+		/*
+		 * Delay freeing bp till interface is detached
+		 * and all routes through this interface are removed.
+		 * Mark bp as detached to restrict new consumers.
+		 */
+		BPFIF_WLOCK(bp);
+		bp->flags |= BPFIF_FLAG_DYING;
+		BPFIF_WUNLOCK(bp);
+
+		CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p",
+		    __func__, bp->bif_dlt, bp, ifp);
+
+		/* Free common descriptors */
+		while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
+			bpf_detachd_locked(d);
+			BPFD_LOCK(d);
+			bpf_wakeup(d);
+			BPFD_UNLOCK(d);
 		}
-	} while (bp != NULL);
+
+		/* Free writer-only descriptors */
+		while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
+			bpf_detachd_locked(d);
+			BPFD_LOCK(d);
+			bpf_wakeup(d);
+			BPFD_UNLOCK(d);
+		}
+	}
 	BPF_UNLOCK();
 
 #ifdef INVARIANTS
@@ -2546,32 +2569,50 @@
 /*
  * Interface departure handler.
  * Note departure event does not guarantee interface is going down.
+ * Interface renaming is currently done via departure/arrival event set.
+ *
+ * Departure handled is called after all routes pointing to
+ * given interface are removed and interface is in down state
+ * restricting any packets to be sent/received. We assume it is now safe
+ * to free data allocated by BPF.
  */
 static void
 bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
 {
-	struct bpf_if *bp;
+	struct bpf_if *bp, *bp_temp;
+	int nmatched = 0;
 
-	BPF_LOCK();
-	if ((bp = ifp->if_bpf) == NULL) {
-		BPF_UNLOCK();
+	/* Ignore ifnet renaming. */
+	if (ifp->if_flags & IFF_RENAMING)
 		return;
-	}
 
-	/* Check if bpfdetach() was called previously */
-	if ((bp->flags & BPFIF_FLAG_DYING) == 0) {
-		BPF_UNLOCK();
-		return;
-	}
+	BPF_LOCK();
+	/*
+	 * Find matching entries in free list.
+	 * Nothing should be found if bpfdetach() was not called.
+	 */
+	LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) {
+		if (ifp != bp->bif_ifp)
+			continue;
 
-	CTR3(KTR_NET, "%s: freing BPF instance %p for interface %p",
-	    __func__, bp, ifp);
+		CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p",
+		    __func__, bp, ifp);
 
-	ifp->if_bpf = NULL;
+		LIST_REMOVE(bp, bif_next);
+
+		rw_destroy(&bp->bif_lock);
+		free(bp, M_BPF);
+
+		nmatched++;
+	}
 	BPF_UNLOCK();
 
-	rw_destroy(&bp->bif_lock);
-	free(bp, M_BPF);
+	/*
+	 * Note that we cannot zero other pointers to
+	 * custom DLTs possibly used by given interface.
+	 */
+	if (nmatched != 0)
+		ifp->if_bpf = NULL;
 }
 
 /*
@@ -2580,26 +2621,44 @@
 static int
 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
 {
-	int n, error;
 	struct ifnet *ifp;
 	struct bpf_if *bp;
+	u_int *lst;
+	int error, n, n1;
 
 	BPF_LOCK_ASSERT();
 
 	ifp = d->bd_bif->bif_ifp;
+again:
+	n1 = 0;
+	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+		if (bp->bif_ifp == ifp)
+			n1++;
+	}
+	if (bfl->bfl_list == NULL) {
+		bfl->bfl_len = n1;
+		return (0);
+	}
+	if (n1 > bfl->bfl_len)
+		return (ENOMEM);
+	BPF_UNLOCK();
+	lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
 	n = 0;
-	error = 0;
+	BPF_LOCK();
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp != ifp)
 			continue;
-		if (bfl->bfl_list != NULL) {
-			if (n >= bfl->bfl_len)
-				return (ENOMEM);
-			error = copyout(&bp->bif_dlt,
-			    bfl->bfl_list + n, sizeof(u_int));
+		if (n >= n1) {
+			free(lst, M_TEMP);
+			goto again;
 		}
+		lst[n] = bp->bif_dlt;
 		n++;
 	}
+	BPF_UNLOCK();
+	error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
+	free(lst, M_TEMP);
+	BPF_LOCK();
 	bfl->bfl_len = n;
 	return (error);
 }
@@ -2651,6 +2710,7 @@
 
 	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
 	LIST_INIT(&bpf_iflist);
+	LIST_INIT(&bpf_freelist);
 
 	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
 	/* For compatibility */
@@ -2732,7 +2792,8 @@
 static int
 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
 {
-	struct xbpf_d *xbdbuf, *xbd, zerostats;
+	static const struct xbpf_d zerostats;
+	struct xbpf_d *xbdbuf, *xbd, tempstats;
 	int index, error;
 	struct bpf_if *bp;
 	struct bpf_d *bd;
@@ -2752,11 +2813,13 @@
 	 * as we aren't allowing the user to set the counters currently.
 	 */
 	if (req->newptr != NULL) {
-		if (req->newlen != sizeof(zerostats))
+		if (req->newlen != sizeof(tempstats))
 			return (EINVAL);
-		bzero(&zerostats, sizeof(zerostats));
-		xbd = req->newptr;
-		if (bcmp(xbd, &zerostats, sizeof(*xbd)) != 0)
+		memset(&tempstats, 0, sizeof(tempstats));
+		error = SYSCTL_IN(req, &tempstats, sizeof(tempstats));
+		if (error)
+			return (error);
+		if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0)
 			return (EINVAL);
 		bpf_zero_counters();
 		return (0);

Modified: trunk/sys/net/bpf.h
===================================================================
--- trunk/sys/net/bpf.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -35,7 +35,7 @@
  *      @(#)bpf.h	8.1 (Berkeley) 6/10/93
  *	@(#)bpf.h	1.34 (LBL)     6/16/96
  *
- * $FreeBSD: stable/9/sys/net/bpf.h 247629 2013-03-02 15:11:20Z melifaro $
+ * $FreeBSD: stable/10/sys/net/bpf.h 286856 2015-08-17 18:43:39Z loos $
  */
 
 #ifndef _NET_BPF_H_
@@ -1235,8 +1235,9 @@
 
 /*
  * Rotate the packet buffers in descriptor d.  Move the store buffer into the
- * hold slot, and the free buffer ino the store slot.  Zero the length of the
- * new store buffer.  Descriptor lock should be held.
+ * hold slot, and the free buffer into the store slot.  Zero the length of the
+ * new store buffer.  Descriptor lock should be held.  One must be careful to
+ * not rotate the buffers twice, i.e. if fbuf != NULL.
  */
 #define	ROTATE_BUFFERS(d)	do {					\
 	(d)->bd_hbuf = (d)->bd_sbuf;					\

Modified: trunk/sys/net/bpf_buffer.c
===================================================================
--- trunk/sys/net/bpf_buffer.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf_buffer.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -63,7 +63,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/bpf_buffer.c 247629 2013-03-02 15:11:20Z melifaro $");
+__FBSDID("$FreeBSD: stable/10/sys/net/bpf_buffer.c 286856 2015-08-17 18:43:39Z loos $");
 
 #include "opt_bpf.h"
 

Modified: trunk/sys/net/bpf_buffer.h
===================================================================
--- trunk/sys/net/bpf_buffer.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf_buffer.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -27,7 +27,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/bpf_buffer.h 247629 2013-03-02 15:11:20Z melifaro $
+ * $FreeBSD: stable/10/sys/net/bpf_buffer.h 235746 2012-05-21 22:19:19Z melifaro $
  */
 
 #ifndef _NET_BPF_BUFFER_H_

Modified: trunk/sys/net/bpf_filter.c
===================================================================
--- trunk/sys/net/bpf_filter.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf_filter.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -36,10 +36,13 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/bpf_filter.c 224044 2011-07-14 21:06:22Z mp $");
+__FBSDID("$FreeBSD: stable/10/sys/net/bpf_filter.c 264457 2014-04-14 13:30:08Z jmmv $");
 
 #include <sys/param.h>
 
+#if !defined(_KERNEL)
+#include <strings.h>
+#endif
 #if !defined(_KERNEL) || defined(sun)
 #include <netinet/in.h>
 #endif

Modified: trunk/sys/net/bpf_jitter.c
===================================================================
--- trunk/sys/net/bpf_jitter.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf_jitter.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/bpf_jitter.c 199615 2009-11-20 21:12:40Z jkim $");
+__FBSDID("$FreeBSD: stable/10/sys/net/bpf_jitter.c 199615 2009-11-20 21:12:40Z jkim $");
 
 #ifdef _KERNEL
 #include "opt_bpf.h"

Modified: trunk/sys/net/bpf_jitter.h
===================================================================
--- trunk/sys/net/bpf_jitter.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf_jitter.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -29,7 +29,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/bpf_jitter.h 199603 2009-11-20 18:49:20Z jkim $
+ * $FreeBSD: stable/10/sys/net/bpf_jitter.h 199603 2009-11-20 18:49:20Z jkim $
  */
 
 #ifndef _NET_BPF_JITTER_H_

Modified: trunk/sys/net/bpf_zerocopy.c
===================================================================
--- trunk/sys/net/bpf_zerocopy.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf_zerocopy.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -29,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/bpf_zerocopy.c 240238 2012-09-08 16:40:18Z kib $");
+__FBSDID("$FreeBSD: stable/10/sys/net/bpf_zerocopy.c 239065 2012-08-05 14:11:42Z kib $");
 
 #include "opt_bpf.h"
 

Modified: trunk/sys/net/bpf_zerocopy.h
===================================================================
--- trunk/sys/net/bpf_zerocopy.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf_zerocopy.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -27,7 +27,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/bpf_zerocopy.h 234969 2012-05-03 16:48:48Z eadler $
+ * $FreeBSD: stable/10/sys/net/bpf_zerocopy.h 230108 2012-01-14 17:07:52Z eadler $
  */
 
 #ifndef _NET_BPF_ZEROCOPY_H_

Modified: trunk/sys/net/bpfdesc.h
===================================================================
--- trunk/sys/net/bpfdesc.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpfdesc.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -34,7 +34,7 @@
  *
  *      @(#)bpfdesc.h	8.1 (Berkeley) 6/10/93
  *
- * $FreeBSD: stable/9/sys/net/bpfdesc.h 247629 2013-03-02 15:11:20Z melifaro $
+ * $FreeBSD: stable/10/sys/net/bpfdesc.h 244090 2012-12-10 16:14:44Z ghelmer $
  */
 
 #ifndef _NET_BPFDESC_H_
@@ -64,6 +64,7 @@
 	caddr_t		bd_sbuf;	/* store slot */
 	caddr_t		bd_hbuf;	/* hold slot */
 	caddr_t		bd_fbuf;	/* free slot */
+	int		bd_hbuf_in_use;	/* don't rotate buffers */
 	int 		bd_slen;	/* current length of store buffer */
 	int 		bd_hlen;	/* current length of hold buffer */
 

Modified: trunk/sys/net/bridgestp.c
===================================================================
--- trunk/sys/net/bridgestp.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bridgestp.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/bridgestp.c 236052 2012-05-26 07:43:17Z thompsa $");
+__FBSDID("$FreeBSD: stable/10/sys/net/bridgestp.c 248324 2013-03-15 12:55:30Z glebius $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -235,7 +235,7 @@
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 
-	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return;
 
@@ -349,7 +349,7 @@
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 
-	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return;
 

Modified: trunk/sys/net/bridgestp.h
===================================================================
--- trunk/sys/net/bridgestp.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bridgestp.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -68,7 +68,7 @@
  *
  * OpenBSD: if_bridge.h,v 1.14 2001/03/22 03:48:29 jason Exp
  *
- * $FreeBSD: stable/9/sys/net/bridgestp.h 236052 2012-05-26 07:43:17Z thompsa $
+ * $FreeBSD: stable/10/sys/net/bridgestp.h 234488 2012-04-20 10:06:28Z thompsa $
  */
 
 /*

Modified: trunk/sys/net/ethernet.h
===================================================================
--- trunk/sys/net/ethernet.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/ethernet.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -2,7 +2,7 @@
 /*
  * Fundamental constants relating to ethernet.
  *
- * $FreeBSD: stable/9/sys/net/ethernet.h 191148 2009-04-16 20:30:28Z kmacy $
+ * $FreeBSD: stable/10/sys/net/ethernet.h 321752 2017-07-31 03:49:08Z sephe $
  *
  */
 
@@ -315,6 +315,7 @@
 #define	ETHERTYPE_SLOW		0x8809	/* 802.3ad link aggregation (LACP) */
 #define	ETHERTYPE_PPP		0x880B	/* PPP (obsolete by PPPoE) */
 #define	ETHERTYPE_HITACHI	0x8820	/* Hitachi Cable (Optoelectronic Systems Laboratory) */
+#define ETHERTYPE_TEST		0x8822  /* Network Conformance Testing */
 #define	ETHERTYPE_MPLS		0x8847	/* MPLS Unicast */
 #define	ETHERTYPE_MPLS_MCAST	0x8848	/* MPLS Multicast */
 #define	ETHERTYPE_AXIS		0x8856	/* Axis Communications AB proprietary bootstrap/config */
@@ -376,8 +377,8 @@
 extern	void ether_ifattach(struct ifnet *, const u_int8_t *);
 extern	void ether_ifdetach(struct ifnet *);
 extern	int  ether_ioctl(struct ifnet *, u_long, caddr_t);
-extern	int  ether_output(struct ifnet *,
-		   struct mbuf *, struct sockaddr *, struct route *);
+extern	int  ether_output(struct ifnet *, struct mbuf *,
+	    const struct sockaddr *, struct route *);
 extern	int  ether_output_frame(struct ifnet *, struct mbuf *);
 extern	char *ether_sprintf(const u_int8_t *);
 void	ether_vlan_mtap(struct bpf_if *, struct mbuf *,
@@ -384,6 +385,12 @@
 	    void *, u_int);
 struct mbuf  *ether_vlanencap(struct mbuf *, uint16_t);
 
+#ifdef SYS_EVENTHANDLER_H
+/* new ethernet interface attached event */
+typedef void (*ether_ifattach_event_handler_t)(void *, struct ifnet *);
+EVENTHANDLER_DECLARE(ether_ifattach_event, ether_ifattach_event_handler_t);
+#endif
+
 #else /* _KERNEL */
 
 #include <sys/cdefs.h>

Modified: trunk/sys/net/fddi.h
===================================================================
--- trunk/sys/net/fddi.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/fddi.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -34,7 +34,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_fddi.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/fddi.h 194581 2009-06-21 10:29:31Z rdivacky $
+ * $FreeBSD: stable/10/sys/net/fddi.h 194581 2009-06-21 10:29:31Z rdivacky $
  */
 
 #ifndef _NETINET_IF_FDDI_H_

Modified: trunk/sys/net/firewire.h
===================================================================
--- trunk/sys/net/firewire.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/firewire.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -24,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	$FreeBSD: stable/9/sys/net/firewire.h 194581 2009-06-21 10:29:31Z rdivacky $
+ *	$FreeBSD: stable/10/sys/net/firewire.h 194581 2009-06-21 10:29:31Z rdivacky $
  */
 
 #ifndef _NET_FIREWIRE_H_

Modified: trunk/sys/net/flowtable.c
===================================================================
--- trunk/sys/net/flowtable.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/flowtable.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,33 +1,32 @@
 /* $MidnightBSD$ */
-/**************************************************************************
+/*-
+ * Copyright (c) 2014 Gleb Smirnoff <glebius at FreeBSD.org>
+ * Copyright (c) 2008-2010, BitGravity Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Neither the name of the BitGravity Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
 
-Copyright (c) 2008-2010, BitGravity Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the BitGravity Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
-
 #include "opt_route.h"
 #include "opt_mpath.h"
 #include "opt_ddb.h"
@@ -35,19 +34,22 @@
 #include "opt_inet6.h"
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/flowtable.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/flowtable.c 281955 2015-04-24 23:26:44Z hiren $");
 
-#include <sys/param.h>  
+#include <sys/param.h>
 #include <sys/types.h>
 #include <sys/bitstring.h>
 #include <sys/condvar.h>
 #include <sys/callout.h>
-#include <sys/kernel.h>  
+#include <sys/hash.h>
+#include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/pcpu.h>
 #include <sys/proc.h>
+#include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
@@ -54,11 +56,12 @@
 #include <sys/socket.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
+#include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_llatbl.h>
 #include <net/if_var.h>
-#include <net/route.h> 
+#include <net/route.h>
 #include <net/flowtable.h>
 #include <net/vnet.h>
 
@@ -70,157 +73,79 @@
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
+#ifdef FLOWTABLE_HASH_ALL
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 #include <netinet/sctp.h>
+#endif
 
-#include <libkern/jenkins.h>
 #include <ddb/ddb.h>
 
-struct ipv4_tuple {
-	uint16_t 	ip_sport;	/* source port */
-	uint16_t 	ip_dport;	/* destination port */
-	in_addr_t 	ip_saddr;	/* source address */
-	in_addr_t 	ip_daddr;	/* destination address */
-};
+#ifdef	FLOWTABLE_HASH_ALL
+#define	KEY_PORTS	(sizeof(uint16_t) * 2)
+#define	KEY_ADDRS	2
+#else
+#define	KEY_PORTS	0
+#define	KEY_ADDRS	1
+#endif
 
-union ipv4_flow {
-	struct ipv4_tuple ipf_ipt;
-	uint32_t 	ipf_key[3];
-};
+#ifdef	INET6
+#define	KEY_ADDR_LEN	sizeof(struct in6_addr)
+#else
+#define	KEY_ADDR_LEN	sizeof(struct in_addr)
+#endif
 
-struct ipv6_tuple {
-	uint16_t 	ip_sport;	/* source port */
-	uint16_t 	ip_dport;	/* destination port */
-	struct in6_addr	ip_saddr;	/* source address */
-	struct in6_addr	ip_daddr;	/* destination address */
-};
+#define	KEYLEN	((KEY_ADDR_LEN * KEY_ADDRS + KEY_PORTS) / sizeof(uint32_t))
 
-union ipv6_flow {
-	struct ipv6_tuple ipf_ipt;
-	uint32_t 	ipf_key[9];
-};
-
 struct flentry {
-	volatile uint32_t	f_fhash;	/* hash flowing forward */
-	uint16_t		f_flags;	/* flow flags */
-	uint8_t			f_pad;		
+	uint32_t		f_hash;		/* hash flowing forward */
+	uint32_t		f_key[KEYLEN];	/* address(es and ports) */
+	uint32_t		f_uptime;	/* uptime at last access */
+	uint16_t		f_fibnum;	/* fib index */
+#ifdef FLOWTABLE_HASH_ALL
 	uint8_t			f_proto;	/* protocol */
-	uint32_t		f_fibnum;	/* fib index */
-	uint32_t		f_uptime;	/* uptime at last access */
-	struct flentry		*f_next;	/* pointer to collision entry */
-	volatile struct rtentry *f_rt;		/* rtentry for flow */
-	volatile struct llentry *f_lle;		/* llentry for flow */
+	uint8_t			f_flags;	/* stale? */
+#define FL_STALE 		1
+#endif
+	SLIST_ENTRY(flentry)	f_next;		/* pointer to collision entry */
+	struct rtentry		*f_rt;		/* rtentry for flow */
+	struct llentry		*f_lle;		/* llentry for flow */
 };
+#undef KEYLEN
 
-struct flentry_v4 {
-	struct flentry	fl_entry;
-	union ipv4_flow	fl_flow;
-};
+SLIST_HEAD(flist, flentry);
+/* Make sure we can use pcpu_zone_ptr for struct flist. */
+CTASSERT(sizeof(struct flist) == sizeof(void *));
 
-struct flentry_v6 {
-	struct flentry	fl_entry;
-	union ipv6_flow	fl_flow;
-};
-
-#define	fl_fhash	fl_entry.fl_fhash
-#define	fl_flags	fl_entry.fl_flags
-#define	fl_proto	fl_entry.fl_proto
-#define	fl_uptime	fl_entry.fl_uptime
-#define	fl_rt		fl_entry.fl_rt
-#define	fl_lle		fl_entry.fl_lle
-
-#define	SECS_PER_HOUR		3600
-#define	SECS_PER_DAY		(24*SECS_PER_HOUR)
-
-#define	SYN_IDLE		300
-#define	UDP_IDLE		300
-#define	FIN_WAIT_IDLE		600
-#define	TCP_IDLE		SECS_PER_DAY
-
-
-typedef	void fl_lock_t(struct flowtable *, uint32_t);
-typedef void fl_rtalloc_t(struct route *, uint32_t, u_int);
-
-union flentryp {
-	struct flentry		**global;
-	struct flentry		**pcpu[MAXCPU];
-};
-
-struct flowtable_stats {
-	uint64_t	ft_collisions;
-	uint64_t	ft_allocated;
-	uint64_t	ft_misses;
-	uint64_t	ft_max_depth;
-	uint64_t	ft_free_checks;
-	uint64_t	ft_frees;
-	uint64_t	ft_hits;
-	uint64_t	ft_lookups;
-} __aligned(CACHE_LINE_SIZE);
-
 struct flowtable {
-	struct	flowtable_stats ft_stats[MAXCPU];
+	counter_u64_t	*ft_stat;
 	int 		ft_size;
-	int 		ft_lock_count;
-	uint32_t	ft_flags;
-	char		*ft_name;
-	fl_lock_t	*ft_lock;
-	fl_lock_t 	*ft_unlock;
-	fl_rtalloc_t	*ft_rtalloc;
 	/*
-	 * XXX need to pad out 
-	 */ 
-	struct mtx	*ft_locks;
-	union flentryp	ft_table;
-	bitstr_t 	*ft_masks[MAXCPU];
+	 * ft_table is a malloc(9)ed array of pointers.  Pointers point to
+	 * memory from UMA_ZONE_PCPU zone.
+	 * ft_masks is per-cpu pointer itself.  Each instance points
+	 * to a malloc(9)ed bitset, that is private to corresponding CPU.
+	 */
+	struct flist	**ft_table;
+	bitstr_t 	**ft_masks;
 	bitstr_t	*ft_tmpmask;
-	struct flowtable *ft_next;
+};
 
-	uint32_t	ft_count __aligned(CACHE_LINE_SIZE);
-	uint32_t	ft_udp_idle __aligned(CACHE_LINE_SIZE);
-	uint32_t	ft_fin_wait_idle;
-	uint32_t	ft_syn_idle;
-	uint32_t	ft_tcp_idle;
-	boolean_t	ft_full;
-} __aligned(CACHE_LINE_SIZE);
+#define	FLOWSTAT_ADD(ft, name, v)	\
+	counter_u64_add((ft)->ft_stat[offsetof(struct flowtable_stat, name) / sizeof(uint64_t)], (v))
+#define	FLOWSTAT_INC(ft, name)	FLOWSTAT_ADD(ft, name, 1)
 
 static struct proc *flowcleanerproc;
-static VNET_DEFINE(struct flowtable *, flow_list_head);
-static VNET_DEFINE(uint32_t, flow_hashjitter);
-static VNET_DEFINE(uma_zone_t, flow_ipv4_zone);
-static VNET_DEFINE(uma_zone_t, flow_ipv6_zone);
+static uint32_t flow_hashjitter;
 
-#define	V_flow_list_head	VNET(flow_list_head)
-#define	V_flow_hashjitter	VNET(flow_hashjitter)
-#define	V_flow_ipv4_zone	VNET(flow_ipv4_zone)
-#define	V_flow_ipv6_zone	VNET(flow_ipv6_zone)
-
-
 static struct cv 	flowclean_f_cv;
 static struct cv 	flowclean_c_cv;
 static struct mtx	flowclean_lock;
 static uint32_t		flowclean_cycles;
-static uint32_t		flowclean_freq;
 
-#ifdef FLOWTABLE_DEBUG
-#define FLDPRINTF(ft, flags, fmt, ...) 		\
-do {		  				\
-	if ((ft)->ft_flags & (flags))		\
-		printf((fmt), __VA_ARGS__);	\
-} while (0);					\
-
-#else
-#define FLDPRINTF(ft, flags, fmt, ...)
-
-#endif
-
-
 /*
  * TODO:
- * - Make flowtable stats per-cpu, aggregated at sysctl call time,
- *   to avoid extra cache evictions caused by incrementing a shared
- *   counter
- * - add sysctls to resize && flush flow tables 
+ * - add sysctls to resize && flush flow tables
  * - Add per flowtable sysctls for statistics and configuring timeouts
  * - add saturation counter to rtentry to support per-packet load-balancing
  *   add flag to indicate round-robin flow, add list lookup from head
@@ -231,396 +156,117 @@
  * - support explicit connection state (currently only ad-hoc for DSR)
  * - idetach() cleanup for options VIMAGE builds.
  */
-VNET_DEFINE(int, flowtable_enable) = 1;
-static VNET_DEFINE(int, flowtable_debug);
-static VNET_DEFINE(int, flowtable_syn_expire) = SYN_IDLE;
-static VNET_DEFINE(int, flowtable_udp_expire) = UDP_IDLE;
-static VNET_DEFINE(int, flowtable_fin_wait_expire) = FIN_WAIT_IDLE;
-static VNET_DEFINE(int, flowtable_tcp_expire) = TCP_IDLE;
-static VNET_DEFINE(int, flowtable_nmbflows);
-static VNET_DEFINE(int, flowtable_ready) = 0;
+#ifdef INET
+static VNET_DEFINE(struct flowtable, ip4_ft);
+#define	V_ip4_ft	VNET(ip4_ft)
+#endif
+#ifdef INET6
+static VNET_DEFINE(struct flowtable, ip6_ft);
+#define	V_ip6_ft	VNET(ip6_ft)
+#endif
 
+static uma_zone_t flow_zone;
+
+static VNET_DEFINE(int, flowtable_enable) = 1;
 #define	V_flowtable_enable		VNET(flowtable_enable)
-#define	V_flowtable_debug		VNET(flowtable_debug)
-#define	V_flowtable_syn_expire		VNET(flowtable_syn_expire)
-#define	V_flowtable_udp_expire		VNET(flowtable_udp_expire)
-#define	V_flowtable_fin_wait_expire	VNET(flowtable_fin_wait_expire)
-#define	V_flowtable_tcp_expire		VNET(flowtable_tcp_expire)
-#define	V_flowtable_nmbflows		VNET(flowtable_nmbflows)
-#define	V_flowtable_ready		VNET(flowtable_ready)
 
-static SYSCTL_NODE(_net_inet, OID_AUTO, flowtable, CTLFLAG_RD, NULL,
+static SYSCTL_NODE(_net, OID_AUTO, flowtable, CTLFLAG_RD, NULL,
     "flowtable");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, debug, CTLFLAG_RW,
-    &VNET_NAME(flowtable_debug), 0, "print debug info.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, enable, CTLFLAG_RW,
+SYSCTL_VNET_INT(_net_flowtable, OID_AUTO, enable, CTLFLAG_RW,
     &VNET_NAME(flowtable_enable), 0, "enable flowtable caching.");
+SYSCTL_UMA_MAX(_net_flowtable, OID_AUTO, maxflows, CTLFLAG_RW,
+    &flow_zone, "Maximum number of flows allowed");
 
-/*
- * XXX This does not end up updating timeouts at runtime
- * and only reflects the value for the last table added :-/
- */
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, syn_expire, CTLFLAG_RW,
-    &VNET_NAME(flowtable_syn_expire), 0,
-    "seconds after which to remove syn allocated flow.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, udp_expire, CTLFLAG_RW,
-    &VNET_NAME(flowtable_udp_expire), 0,
-    "seconds after which to remove flow allocated to UDP.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, fin_wait_expire, CTLFLAG_RW,
-    &VNET_NAME(flowtable_fin_wait_expire), 0,
-    "seconds after which to remove a flow in FIN_WAIT.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, tcp_expire, CTLFLAG_RW,
-    &VNET_NAME(flowtable_tcp_expire), 0,
-    "seconds after which to remove flow allocated to a TCP connection.");
+static MALLOC_DEFINE(M_FTABLE, "flowtable", "flowtable hashes and bitstrings");
 
+static struct flentry *
+flowtable_lookup_common(struct flowtable *, uint32_t *, int, uint32_t);
 
-/*
- * Maximum number of flows that can be allocated of a given type.
- *
- * The table is allocated at boot time (for the pure caching case
- * there is no reason why this could not be changed at runtime)
- * and thus (currently) needs to be set with a tunable.
- */
-static int
-sysctl_nmbflows(SYSCTL_HANDLER_ARGS)
-{
-	int error, newnmbflows;
-
-	newnmbflows = V_flowtable_nmbflows;
-	error = sysctl_handle_int(oidp, &newnmbflows, 0, req); 
-	if (error == 0 && req->newptr) {
-		if (newnmbflows > V_flowtable_nmbflows) {
-			V_flowtable_nmbflows = newnmbflows;
-			uma_zone_set_max(V_flow_ipv4_zone,
-			    V_flowtable_nmbflows);
-			uma_zone_set_max(V_flow_ipv6_zone,
-			    V_flowtable_nmbflows);
-		} else
-			error = EINVAL;
-	}
-	return (error);
-}
-SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, nmbflows,
-    CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_nmbflows, "IU",
-    "Maximum number of flows allowed");
-
-
-
-#define FS_PRINT(sb, field)	sbuf_printf((sb), "\t%s: %jd\n", #field, fs->ft_##field)
-
-static void
-fs_print(struct sbuf *sb, struct flowtable_stats *fs)
-{
-
-	FS_PRINT(sb, collisions);
-	FS_PRINT(sb, allocated);
-	FS_PRINT(sb, misses);
-	FS_PRINT(sb, max_depth);
-	FS_PRINT(sb, free_checks);
-	FS_PRINT(sb, frees);
-	FS_PRINT(sb, hits);
-	FS_PRINT(sb, lookups);
-}
-
-static void
-flowtable_show_stats(struct sbuf *sb, struct flowtable *ft)
-{
-	int i;
-	struct flowtable_stats fs, *pfs;
-
-	if (ft->ft_flags & FL_PCPU) {
-		bzero(&fs, sizeof(fs));
-		pfs = &fs;
-		CPU_FOREACH(i) {
-			pfs->ft_collisions  += ft->ft_stats[i].ft_collisions;
-			pfs->ft_allocated   += ft->ft_stats[i].ft_allocated;
-			pfs->ft_misses      += ft->ft_stats[i].ft_misses;
-			pfs->ft_free_checks += ft->ft_stats[i].ft_free_checks;
-			pfs->ft_frees       += ft->ft_stats[i].ft_frees;
-			pfs->ft_hits        += ft->ft_stats[i].ft_hits;
-			pfs->ft_lookups     += ft->ft_stats[i].ft_lookups;
-			if (ft->ft_stats[i].ft_max_depth > pfs->ft_max_depth)
-				pfs->ft_max_depth = ft->ft_stats[i].ft_max_depth;
-		}
-	} else {
-		pfs = &ft->ft_stats[0];
-	}
-	fs_print(sb, pfs);
-}
-
-static int
-sysctl_flowtable_stats(SYSCTL_HANDLER_ARGS)
-{
-	struct flowtable *ft;
-	struct sbuf *sb;
-	int error;
-
-	sb = sbuf_new(NULL, NULL, 64*1024, SBUF_FIXEDLEN);
-
-	ft = V_flow_list_head;
-	while (ft != NULL) {
-		sbuf_printf(sb, "\ntable name: %s\n", ft->ft_name);
-		flowtable_show_stats(sb, ft);
-		ft = ft->ft_next;
-	}
-	sbuf_finish(sb);
-	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
-	sbuf_delete(sb);
-
-	return (error);
-}
-SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD,
-    NULL, 0, sysctl_flowtable_stats, "A", "flowtable statistics");
-
-
-#ifndef RADIX_MPATH
-static void
-rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum)
-{
-
-	rtalloc_ign_fib(ro, 0, fibnum);
-}
-#endif
-
-static void
-flowtable_global_lock(struct flowtable *table, uint32_t hash)
-{	
-	int lock_index = (hash)&(table->ft_lock_count - 1);
-
-	mtx_lock(&table->ft_locks[lock_index]);
-}
-
-static void
-flowtable_global_unlock(struct flowtable *table, uint32_t hash)
-{	
-	int lock_index = (hash)&(table->ft_lock_count - 1);
-
-	mtx_unlock(&table->ft_locks[lock_index]);
-}
-
-static void
-flowtable_pcpu_lock(struct flowtable *table, uint32_t hash)
-{
-
-	critical_enter();
-}
-
-static void
-flowtable_pcpu_unlock(struct flowtable *table, uint32_t hash)
-{
-
-	critical_exit();
-}
-
-#define FL_ENTRY_INDEX(table, hash)((hash) % (table)->ft_size)
-#define FL_ENTRY(table, hash) *flowtable_entry((table), (hash))
-#define FL_ENTRY_LOCK(table, hash)  (table)->ft_lock((table), (hash))
-#define FL_ENTRY_UNLOCK(table, hash) (table)->ft_unlock((table), (hash))
-
-#define FL_STALE 	(1<<8)
-#define FL_OVERWRITE	(1<<10)
-
-void
-flow_invalidate(struct flentry *fle)
-{
-
-	fle->f_flags |= FL_STALE;
-}
-
-static __inline int
-proto_to_flags(uint8_t proto)
-{
-	int flag;
-
-	switch (proto) {
-	case IPPROTO_TCP:
-		flag = FL_TCP;
-		break;
-	case IPPROTO_SCTP:
-		flag = FL_SCTP;
-		break;		
-	case IPPROTO_UDP:
-		flag = FL_UDP;
-		break;
-	default:
-		flag = 0;
-		break;
-	}
-
-	return (flag);
-}
-
-static __inline int
-flags_to_proto(int flags)
-{
-	int proto, protoflags;
-
-	protoflags = flags & (FL_TCP|FL_SCTP|FL_UDP);
-	switch (protoflags) {
-	case FL_TCP:
-		proto = IPPROTO_TCP;
-		break;
-	case FL_SCTP:
-		proto = IPPROTO_SCTP;
-		break;
-	case FL_UDP:
-		proto = IPPROTO_UDP;
-		break;
-	default:
-		proto = 0;
-		break;
-	}
-	return (proto);
-}
-
 #ifdef INET
-#ifdef FLOWTABLE_DEBUG
-static void
-ipv4_flow_print_tuple(int flags, int proto, struct sockaddr_in *ssin,
-    struct sockaddr_in *dsin)
+static struct flentry *
+flowtable_lookup_ipv4(struct mbuf *m, struct route *ro)
 {
-	char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
-
-	if (flags & FL_HASH_ALL) {
-		inet_ntoa_r(ssin->sin_addr, saddr);
-		inet_ntoa_r(dsin->sin_addr, daddr);
-		printf("proto=%d %s:%d->%s:%d\n",
-		    proto, saddr, ntohs(ssin->sin_port), daddr,
-		    ntohs(dsin->sin_port));
-	} else {
-		inet_ntoa_r(*(struct in_addr *) &dsin->sin_addr, daddr);
-		printf("proto=%d %s\n", proto, daddr);
-	}
-
-}
-#endif
-
-static int
-ipv4_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
-    struct sockaddr_in *ssin, struct sockaddr_in *dsin, uint16_t *flags)
-{
+	struct flentry *fle;
+	struct sockaddr_in *sin;
 	struct ip *ip;
-	uint8_t proto;
+	uint32_t fibnum;
+#ifdef FLOWTABLE_HASH_ALL
+	uint32_t key[3];
 	int iphlen;
-	struct tcphdr *th;
-	struct udphdr *uh;
-	struct sctphdr *sh;
 	uint16_t sport, dport;
+	uint8_t proto;
+#endif
 
-	proto = sport = dport = 0;
 	ip = mtod(m, struct ip *);
-	dsin->sin_family = AF_INET;
-	dsin->sin_len = sizeof(*dsin);
-	dsin->sin_addr = ip->ip_dst;
-	ssin->sin_family = AF_INET;
-	ssin->sin_len = sizeof(*ssin);
-	ssin->sin_addr = ip->ip_src;	
 
+	if (ip->ip_src.s_addr == ip->ip_dst.s_addr ||
+	    (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
+	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
+		return (NULL);
+
+	fibnum = M_GETFIB(m);
+
+#ifdef FLOWTABLE_HASH_ALL
+	iphlen = ip->ip_hl << 2;
 	proto = ip->ip_p;
-	if ((*flags & FL_HASH_ALL) == 0) {
-		FLDPRINTF(ft, FL_DEBUG_ALL, "skip port check flags=0x%x ",
-		    *flags);
-		goto skipports;
-	}
 
-	iphlen = ip->ip_hl << 2; /* XXX options? */
+	switch (proto) {
+	case IPPROTO_TCP: {
+		struct tcphdr *th;
 
-	switch (proto) {
-	case IPPROTO_TCP:
-		th = (struct tcphdr *)((caddr_t)ip + iphlen);
+		th = (struct tcphdr *)((char *)ip + iphlen);
 		sport = th->th_sport;
 		dport = th->th_dport;
-		if ((*flags & FL_HASH_ALL) &&
-		    (th->th_flags & (TH_RST|TH_FIN)))
-			*flags |= FL_STALE;
-	break;
-	case IPPROTO_UDP:
-		uh = (struct udphdr *)((caddr_t)ip + iphlen);
+		if (th->th_flags & (TH_RST|TH_FIN))
+			fibnum |= (FL_STALE << 24);
+		break;
+	}
+	case IPPROTO_UDP: {
+		struct udphdr *uh;
+
+		uh = (struct udphdr *)((char *)ip + iphlen);
 		sport = uh->uh_sport;
 		dport = uh->uh_dport;
-	break;
-	case IPPROTO_SCTP:
-		sh = (struct sctphdr *)((caddr_t)ip + iphlen);
+		break;
+	}
+	case IPPROTO_SCTP: {
+		struct sctphdr *sh;
+
+		sh = (struct sctphdr *)((char *)ip + iphlen);
 		sport = sh->src_port;
 		dport = sh->dest_port;
-	break;
+		/* XXXGL: handle stale? */
+		break;
+	}
 	default:
-		FLDPRINTF(ft, FL_DEBUG_ALL, "proto=0x%x not supported\n", proto);
-		return (ENOTSUP);
-		/* no port - hence not a protocol we care about */
+		sport = dport = 0;
 		break;
-	
 	}
 
-skipports:
-	*flags |= proto_to_flags(proto);
-	ssin->sin_port = sport;
-	dsin->sin_port = dport;
-	return (0);
-}
+	key[0] = ip->ip_dst.s_addr;
+	key[1] = ip->ip_src.s_addr;
+	key[2] = (dport << 16) | sport;
+	fibnum |= proto << 16;
 
-static uint32_t
-ipv4_flow_lookup_hash_internal(
-	struct sockaddr_in *ssin, struct sockaddr_in *dsin, 
-	    uint32_t *key, uint16_t flags)
-{
-	uint16_t sport, dport;
-	uint8_t proto;
-	int offset = 0;
+	fle = flowtable_lookup_common(&V_ip4_ft, key, 3 * sizeof(uint32_t),
+	    fibnum);
 
-	if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
-		return (0);
-	proto = flags_to_proto(flags);
-	sport = dport = key[2] = key[1] = key[0] = 0;
-	if ((ssin != NULL) && (flags & FL_HASH_ALL)) {
-		key[1] = ssin->sin_addr.s_addr;
-		sport = ssin->sin_port;
-	}
-	if (dsin != NULL) {
-		key[2] = dsin->sin_addr.s_addr;
-		dport = dsin->sin_port;
-	}
-	if (flags & FL_HASH_ALL) {
-		((uint16_t *)key)[0] = sport;
-		((uint16_t *)key)[1] = dport; 
-	} else
-		offset = V_flow_hashjitter + proto;
+#else	/* !FLOWTABLE_HASH_ALL */
 
-	return (jenkins_hashword(key, 3, offset));
-}
+	fle = flowtable_lookup_common(&V_ip4_ft, (uint32_t *)&ip->ip_dst,
+	    sizeof(struct in_addr), fibnum);
 
-static struct flentry *
-flowtable_lookup_mbuf4(struct flowtable *ft, struct mbuf *m)
-{
-	struct sockaddr_storage ssa, dsa;
-	uint16_t flags;
-	struct sockaddr_in *dsin, *ssin;
+#endif	/* FLOWTABLE_HASH_ALL */
 
-	dsin = (struct sockaddr_in *)&dsa;
-	ssin = (struct sockaddr_in *)&ssa;
-	bzero(dsin, sizeof(*dsin));
-	bzero(ssin, sizeof(*ssin));
-	flags = ft->ft_flags;
-	if (ipv4_mbuf_demarshal(ft, m, ssin, dsin, &flags) != 0)
+	if (fle == NULL)
 		return (NULL);
 
-	return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
-}
-
-void
-flow_to_route(struct flentry *fle, struct route *ro)
-{
-	uint32_t *hashkey = NULL;
-	struct sockaddr_in *sin;
-
 	sin = (struct sockaddr_in *)&ro->ro_dst;
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
-	hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
-	sin->sin_addr.s_addr = hashkey[2];
-	ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
-	ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
-	ro->ro_flags |= RT_NORTREF;
+	sin->sin_addr = ip->ip_dst;
+
+	return (fle);
 }
 #endif /* INET */
 
@@ -634,9 +280,8 @@
 #define PULLUP_TO(_len, p, T)						\
 do {									\
 	int x = (_len) + sizeof(T);					\
-	if ((m)->m_len < x) {						\
-		goto receive_failed;					\
-	}								\
+	if ((m)->m_len < x)						\
+		return (NULL);						\
 	p = (mtod(m, char *) + (_len));					\
 } while (0)
 
@@ -644,26 +289,35 @@
 #define	SCTP(p)		((struct sctphdr *)(p))
 #define	UDP(p)		((struct udphdr *)(p))
 
-static int
-ipv6_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
-    struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6, uint16_t *flags)
+static struct flentry *
+flowtable_lookup_ipv6(struct mbuf *m, struct route *ro)
 {
+	struct flentry *fle;
+	struct sockaddr_in6 *sin6;
 	struct ip6_hdr *ip6;
-	uint8_t proto;
+	uint32_t fibnum;
+#ifdef FLOWTABLE_HASH_ALL
+	uint32_t key[9];
+	void *ulp;
 	int hlen;
-	uint16_t src_port, dst_port;
+	uint16_t sport, dport;
 	u_short offset;
-	void *ulp;
+	uint8_t proto;
+#else
+	uint32_t key[4];
+#endif
 
-	offset = hlen = src_port = dst_port = 0;
-	ulp = NULL;
 	ip6 = mtod(m, struct ip6_hdr *);
+	if (in6_localaddr(&ip6->ip6_dst))
+		return (NULL);
+
+	fibnum = M_GETFIB(m);
+
+#ifdef	FLOWTABLE_HASH_ALL
 	hlen = sizeof(struct ip6_hdr);
 	proto = ip6->ip6_nxt;
-
-	if ((*flags & FL_HASH_ALL) == 0)
-		goto skipports;
-
+	offset = sport = dport = 0;
+	ulp = NULL;
 	while (ulp == NULL) {
 		switch (proto) {
 		case IPPROTO_ICMPV6:
@@ -676,21 +330,21 @@
 			break;
 		case IPPROTO_TCP:
 			PULLUP_TO(hlen, ulp, struct tcphdr);
-			dst_port = TCP(ulp)->th_dport;
-			src_port = TCP(ulp)->th_sport;
-			if ((*flags & FL_HASH_ALL) &&
-			    (TCP(ulp)->th_flags & (TH_RST|TH_FIN)))
-				*flags |= FL_STALE;
+			dport = TCP(ulp)->th_dport;
+			sport = TCP(ulp)->th_sport;
+			if (TCP(ulp)->th_flags & (TH_RST|TH_FIN))
+				fibnum |= (FL_STALE << 24);
 			break;
 		case IPPROTO_SCTP:
 			PULLUP_TO(hlen, ulp, struct sctphdr);
-			src_port = SCTP(ulp)->src_port;
-			dst_port = SCTP(ulp)->dest_port;
+			dport = SCTP(ulp)->src_port;
+			sport = SCTP(ulp)->dest_port;
+			/* XXXGL: handle stale? */
 			break;
 		case IPPROTO_UDP:
 			PULLUP_TO(hlen, ulp, struct udphdr);
-			dst_port = UDP(ulp)->uh_dport;
-			src_port = UDP(ulp)->uh_sport;
+			dport = UDP(ulp)->uh_dport;
+			sport = UDP(ulp)->uh_sport;
 			break;
 		case IPPROTO_HOPOPTS:	/* RFC 2460 */
 			PULLUP_TO(hlen, ulp, struct ip6_hbh);
@@ -699,7 +353,7 @@
 			ulp = NULL;
 			break;
 		case IPPROTO_ROUTING:	/* RFC 2460 */
-			PULLUP_TO(hlen, ulp, struct ip6_rthdr);	
+			PULLUP_TO(hlen, ulp, struct ip6_rthdr);
 			hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
 			proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
 			ulp = NULL;
@@ -730,105 +384,28 @@
 		}
 	}
 
-	if (src_port == 0) {
-	receive_failed:
-		return (ENOTSUP);
-	}
+	bcopy(&ip6->ip6_dst, &key[0], sizeof(struct in6_addr));
+	bcopy(&ip6->ip6_src, &key[4], sizeof(struct in6_addr));
+	key[8] = (dport << 16) | sport;
+	fibnum |= proto << 16;
 
-skipports:
-	dsin6->sin6_family = AF_INET6;
-	dsin6->sin6_len = sizeof(*dsin6);
-	dsin6->sin6_port = dst_port;
-	memcpy(&dsin6->sin6_addr, &ip6->ip6_dst, sizeof(struct in6_addr));
+	fle = flowtable_lookup_common(&V_ip6_ft, key, 9 * sizeof(uint32_t),
+	    fibnum);
+#else	/* !FLOWTABLE_HASH_ALL */
+	bcopy(&ip6->ip6_dst, &key[0], sizeof(struct in6_addr));
+	fle = flowtable_lookup_common(&V_ip6_ft, key, sizeof(struct in6_addr),
+	    fibnum);
+#endif	/* FLOWTABLE_HASH_ALL */
 
-	ssin6->sin6_family = AF_INET6;
-	ssin6->sin6_len = sizeof(*ssin6);
-	ssin6->sin6_port = src_port;
-	memcpy(&ssin6->sin6_addr, &ip6->ip6_src, sizeof(struct in6_addr));
-	*flags |= proto_to_flags(proto);
-
-	return (0);
-}
-
-#define zero_key(key) 		\
-do {				\
-	key[0] = 0;		\
-	key[1] = 0;		\
-	key[2] = 0;		\
-	key[3] = 0;		\
-	key[4] = 0;		\
-	key[5] = 0;		\
-	key[6] = 0;		\
-	key[7] = 0;		\
-	key[8] = 0;		\
-} while (0)
-	
-static uint32_t
-ipv6_flow_lookup_hash_internal(
-	struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6, 
-	    uint32_t *key, uint16_t flags)
-{
-	uint16_t sport, dport;
-	uint8_t proto;
-	int offset = 0;
-
-	if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
-		return (0);
-
-	proto = flags_to_proto(flags);
-	zero_key(key);
-	sport = dport = 0;
-	if (dsin6 != NULL) {
-		memcpy(&key[1], &dsin6->sin6_addr, sizeof(struct in6_addr));
-		dport = dsin6->sin6_port;
-	}
-	if ((ssin6 != NULL) && (flags & FL_HASH_ALL)) {
-		memcpy(&key[5], &ssin6->sin6_addr, sizeof(struct in6_addr));
-		sport = ssin6->sin6_port;
-	}
-	if (flags & FL_HASH_ALL) {
-		((uint16_t *)key)[0] = sport;
-		((uint16_t *)key)[1] = dport; 
-	} else
-		offset = V_flow_hashjitter + proto;
-
-	return (jenkins_hashword(key, 9, offset));
-}
-
-static struct flentry *
-flowtable_lookup_mbuf6(struct flowtable *ft, struct mbuf *m)
-{
-	struct sockaddr_storage ssa, dsa;
-	struct sockaddr_in6 *dsin6, *ssin6;	
-	uint16_t flags;
-
-	dsin6 = (struct sockaddr_in6 *)&dsa;
-	ssin6 = (struct sockaddr_in6 *)&ssa;
-	bzero(dsin6, sizeof(*dsin6));
-	bzero(ssin6, sizeof(*ssin6));
-	flags = ft->ft_flags;
-	
-	if (ipv6_mbuf_demarshal(ft, m, ssin6, dsin6, &flags) != 0)
+	if (fle == NULL)
 		return (NULL);
 
-	return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
-}
-
-void
-flow_to_route_in6(struct flentry *fle, struct route_in6 *ro)
-{
-	uint32_t *hashkey = NULL;
-	struct sockaddr_in6 *sin6;
-
 	sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
-
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_len = sizeof(*sin6);
-	hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
-	memcpy(&sin6->sin6_addr, &hashkey[5], sizeof (struct in6_addr));
-	ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
-	ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
-	ro->ro_flags |= RT_NORTREF;
+	bcopy(&ip6->ip6_dst, &sin6->sin6_addr, sizeof(struct in6_addr));
+
+	return (fle);
 }
 #endif /* INET6 */
 
@@ -835,583 +412,368 @@
 static bitstr_t *
 flowtable_mask(struct flowtable *ft)
 {
-	bitstr_t *mask;
 
-	if (ft->ft_flags & FL_PCPU)
-		mask = ft->ft_masks[curcpu];
-	else
-		mask = ft->ft_masks[0];
+	/*
+	 * flowtable_free_stale() calls w/o critical section, but
+	 * with sched_bind(). Since pointer is stable throughout
+	 * ft lifetime, it is safe, otherwise...
+	 *
+	 * CRITICAL_ASSERT(curthread);
+	 */
 
-	return (mask);
+	return (*(bitstr_t **)zpcpu_get(ft->ft_masks));
 }
 
-static struct flentry **
-flowtable_entry(struct flowtable *ft, uint32_t hash)
+static struct flist *
+flowtable_list(struct flowtable *ft, uint32_t hash)
 {
-	struct flentry **fle;
-	int index = (hash % ft->ft_size);
 
-	if (ft->ft_flags & FL_PCPU) {
-		KASSERT(&ft->ft_table.pcpu[curcpu][0] != NULL, ("pcpu not set"));
-		fle = &ft->ft_table.pcpu[curcpu][index];
-	} else {
-		KASSERT(&ft->ft_table.global[0] != NULL, ("global not set"));
-		fle = &ft->ft_table.global[index];
-	}
-	
-	return (fle);
+	CRITICAL_ASSERT(curthread);
+	return (zpcpu_get(ft->ft_table[hash % ft->ft_size]));
 }
 
 static int
-flow_stale(struct flowtable *ft, struct flentry *fle)
+flow_stale(struct flowtable *ft, struct flentry *fle, int maxidle)
 {
-	time_t idle_time;
 
-	if ((fle->f_fhash == 0)
-	    || ((fle->f_rt->rt_flags & RTF_HOST) &&
-		((fle->f_rt->rt_flags & (RTF_UP))
-		    != (RTF_UP)))
-	    || (fle->f_rt->rt_ifp == NULL)
-	    || !RT_LINK_IS_UP(fle->f_rt->rt_ifp))
+	if (((fle->f_rt->rt_flags & RTF_HOST) &&
+	    ((fle->f_rt->rt_flags & (RTF_UP)) != (RTF_UP))) ||
+	    (fle->f_rt->rt_ifp == NULL) ||
+	    !RT_LINK_IS_UP(fle->f_rt->rt_ifp) ||
+	    (fle->f_lle->la_flags & LLE_VALID) == 0)
 		return (1);
 
-	idle_time = time_uptime - fle->f_uptime;
+	if (time_uptime - fle->f_uptime > maxidle)
+		return (1);
 
-	if ((fle->f_flags & FL_STALE) ||
-	    ((fle->f_flags & (TH_SYN|TH_ACK|TH_FIN)) == 0
-		&& (idle_time > ft->ft_udp_idle)) ||
-	    ((fle->f_flags & TH_FIN)
-		&& (idle_time > ft->ft_fin_wait_idle)) ||
-	    ((fle->f_flags & (TH_SYN|TH_ACK)) == TH_SYN
-		&& (idle_time > ft->ft_syn_idle)) ||
-	    ((fle->f_flags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)
-		&& (idle_time > ft->ft_tcp_idle)) ||
-	    ((fle->f_rt->rt_flags & RTF_UP) == 0 || 
-		(fle->f_rt->rt_ifp == NULL)))
+#ifdef FLOWTABLE_HASH_ALL
+	if (fle->f_flags & FL_STALE)
 		return (1);
+#endif
 
 	return (0);
 }
 
-static void
-flowtable_set_hashkey(struct flentry *fle, uint32_t *key)
+static int
+flow_full(void)
 {
-	uint32_t *hashkey;
-	int i, nwords;
+	int count, max;
 
-	if (fle->f_flags & FL_IPV6) {
-		nwords = 9;
-		hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
-	} else {
-		nwords = 3;
-		hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
-	}
-	
-	for (i = 0; i < nwords; i++) 
-		hashkey[i] = key[i];
-}
+	count = uma_zone_get_cur(flow_zone);
+	max = uma_zone_get_max(flow_zone);
 
-static struct flentry *
-flow_alloc(struct flowtable *ft)
-{
-	struct flentry *newfle;
-	uma_zone_t zone;
-
-	newfle = NULL;
-	zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
-
-	newfle = uma_zalloc(zone, M_NOWAIT | M_ZERO);
-	if (newfle != NULL)
-		atomic_add_int(&ft->ft_count, 1);
-	return (newfle);
+	return (count > (max - (max >> 3)));
 }
 
-static void
-flow_free(struct flentry *fle, struct flowtable *ft)
+static int
+flow_matches(struct flentry *fle, uint32_t *key, int keylen, uint32_t fibnum)
 {
-	uma_zone_t zone;
+#ifdef FLOWTABLE_HASH_ALL
+	uint8_t proto;
 
-	zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
-	atomic_add_int(&ft->ft_count, -1);
-	uma_zfree(zone, fle);
-}
+	proto = (fibnum >> 16) & 0xff;
+	fibnum &= 0xffff;
+#endif
 
-static int
-flow_full(struct flowtable *ft)
-{
-	boolean_t full;
-	uint32_t count;
-	
-	full = ft->ft_full;
-	count = ft->ft_count;
+	CRITICAL_ASSERT(curthread);
 
-	if (full && (count < (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 3))))
-		ft->ft_full = FALSE;
-	else if (!full && (count > (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 5))))
-		ft->ft_full = TRUE;
-	
-	if (full && !ft->ft_full) {
-		flowclean_freq = 4*hz;
-		if ((ft->ft_flags & FL_HASH_ALL) == 0)
-			ft->ft_udp_idle = ft->ft_fin_wait_idle =
-			    ft->ft_syn_idle = ft->ft_tcp_idle = 5;
-		cv_broadcast(&flowclean_c_cv);
-	} else if (!full && ft->ft_full) {
-		flowclean_freq = 20*hz;
-		if ((ft->ft_flags & FL_HASH_ALL) == 0)
-			ft->ft_udp_idle = ft->ft_fin_wait_idle =
-			    ft->ft_syn_idle = ft->ft_tcp_idle = 30;
-	}
+	/* Microoptimization for IPv4: don't use bcmp(). */
+	if (((keylen == sizeof(uint32_t) && (fle->f_key[0] != key[0])) ||
+	    (bcmp(fle->f_key, key, keylen) == 0)) &&
+	    fibnum == fle->f_fibnum &&
+#ifdef FLOWTABLE_HASH_ALL
+	    proto == fle->f_proto &&
+#endif
+	    (fle->f_rt->rt_flags & RTF_UP) &&
+	    fle->f_rt->rt_ifp != NULL &&
+	    (fle->f_lle->la_flags & LLE_VALID))
+		return (1);
 
-	return (ft->ft_full);
+	return (0);
 }
 
-static int
+static struct flentry *
 flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
-    uint32_t fibnum, struct route *ro, uint16_t flags)
+    int keylen, uint32_t fibnum0)
 {
-	struct flentry *fle, *fletail, *newfle, **flep;
-	struct flowtable_stats *fs = &ft->ft_stats[curcpu];
-	int depth;
+#ifdef INET6
+	struct route_in6 sro6;
+#endif
+#ifdef INET
+	struct route sro;
+#endif
+	struct route *ro = NULL;
+	struct rtentry *rt;
+	struct lltable *lt = NULL;
+	struct llentry *lle;
+	struct sockaddr_storage *l3addr;
+	struct ifnet *ifp;
+	struct flist *flist;
+	struct flentry *fle, *iter;
 	bitstr_t *mask;
+	uint16_t fibnum = fibnum0;
+#ifdef FLOWTABLE_HASH_ALL
 	uint8_t proto;
 
-	newfle = flow_alloc(ft);
-	if (newfle == NULL)
-		return (ENOMEM);
+	proto = (fibnum0 >> 16) & 0xff;
+	fibnum = fibnum0 & 0xffff;
+#endif
 
-	newfle->f_flags |= (flags & FL_IPV6);
-	proto = flags_to_proto(flags);
-
-	FL_ENTRY_LOCK(ft, hash);
-	mask = flowtable_mask(ft);
-	flep = flowtable_entry(ft, hash);
-	fletail = fle = *flep;
-
-	if (fle == NULL) {
-		bit_set(mask, FL_ENTRY_INDEX(ft, hash));
-		*flep = fle = newfle;
-		goto skip;
-	} 
-	
-	depth = 0;
-	fs->ft_collisions++;
 	/*
-	 * find end of list and make sure that we were not
-	 * preempted by another thread handling this flow
+	 * This bit of code ends up locking the
+	 * same route 3 times (just like ip_output + ether_output)
+	 * - at lookup
+	 * - in rt_check when called by arpresolve
+	 * - dropping the refcount for the rtentry
+	 *
+	 * This could be consolidated to one if we wrote a variant
+	 * of arpresolve with an rt_check variant that expected to
+	 * receive the route locked
 	 */
-	while (fle != NULL) {
-		if (fle->f_fhash == hash && !flow_stale(ft, fle)) {
-			/*
-			 * there was either a hash collision
-			 * or we lost a race to insert
-			 */
-			FL_ENTRY_UNLOCK(ft, hash);
-			flow_free(newfle, ft);
-			
-			if (flags & FL_OVERWRITE) 
-				goto skip;
-			return (EEXIST);
-		}
-		/*
-		 * re-visit this double condition XXX
-		 */
-		if (fletail->f_next != NULL)
-			fletail = fle->f_next;
+#ifdef INET
+	if (ft == &V_ip4_ft) {
+		struct sockaddr_in *sin;
 
-		depth++;
-		fle = fle->f_next;
-	} 
+		ro = &sro;
+		bzero(&sro.ro_dst, sizeof(sro.ro_dst));
 
-	if (depth > fs->ft_max_depth)
-		fs->ft_max_depth = depth;
-	fletail->f_next = newfle;
-	fle = newfle;
-skip:
-	flowtable_set_hashkey(fle, key);
-
-	fle->f_proto = proto;
-	fle->f_rt = ro->ro_rt;
-	fle->f_lle = ro->ro_lle;
-	fle->f_fhash = hash;
-	fle->f_fibnum = fibnum;
-	fle->f_uptime = time_uptime;
-	FL_ENTRY_UNLOCK(ft, hash);
-	return (0);
-}
-
-int
-kern_flowtable_insert(struct flowtable *ft,
-    struct sockaddr_storage *ssa, struct sockaddr_storage *dsa,
-    struct route *ro, uint32_t fibnum, int flags)
-{
-	uint32_t key[9], hash;
-
-	flags = (ft->ft_flags | flags | FL_OVERWRITE);
-	hash = 0;
-
-#ifdef INET
-	if (ssa->ss_family == AF_INET) 
-		hash = ipv4_flow_lookup_hash_internal((struct sockaddr_in *)ssa,
-		    (struct sockaddr_in *)dsa, key, flags);
+		sin = (struct sockaddr_in *)&sro.ro_dst;
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_addr.s_addr = key[0];
+	}
 #endif
 #ifdef INET6
-	if (ssa->ss_family == AF_INET6) 
-		hash = ipv6_flow_lookup_hash_internal((struct sockaddr_in6 *)ssa,
-		    (struct sockaddr_in6 *)dsa, key, flags);
-#endif	
-	if (ro->ro_rt == NULL || ro->ro_lle == NULL)
-		return (EINVAL);
+	if (ft == &V_ip6_ft) {
+		struct sockaddr_in6 *sin6;
 
-	FLDPRINTF(ft, FL_DEBUG,
-	    "kern_flowtable_insert: key=%x:%x:%x hash=%x fibnum=%d flags=%x\n",
-	    key[0], key[1], key[2], hash, fibnum, flags);
-	return (flowtable_insert(ft, hash, key, fibnum, ro, flags));
-}
+		ro = (struct route *)&sro6;
+		sin6 = &sro6.ro_dst;
 
-static int
-flowtable_key_equal(struct flentry *fle, uint32_t *key)
-{
-	uint32_t *hashkey;
-	int i, nwords;
-
-	if (fle->f_flags & FL_IPV6) {
-		nwords = 9;
-		hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
-	} else {
-		nwords = 3;
-		hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
+		bzero(sin6, sizeof(*sin6));
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(*sin6);
+		bcopy(key, &sin6->sin6_addr, sizeof(struct in6_addr));
 	}
+#endif
 
-	for (i = 0; i < nwords; i++) 
-		if (hashkey[i] != key[i])
-			return (0);
+	ro->ro_rt = NULL;
+#ifdef RADIX_MPATH
+	rtalloc_mpath_fib(ro, hash, fibnum);
+#else
+	rtalloc_ign_fib(ro, 0, fibnum);
+#endif
+	if (ro->ro_rt == NULL)
+		return (NULL);
 
-	return (1);
-}
+	rt = ro->ro_rt;
+	ifp = rt->rt_ifp;
 
-struct flentry *
-flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af)
-{
-	struct flentry *fle = NULL;
+	if (ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) {
+		RTFREE(rt);
+		return (NULL);
+	}
 
 #ifdef INET
-	if (af == AF_INET)
-		fle = flowtable_lookup_mbuf4(ft, m);
+	if (ft == &V_ip4_ft)
+		lt = LLTABLE(ifp);
 #endif
 #ifdef INET6
-	if (af == AF_INET6)
-		fle = flowtable_lookup_mbuf6(ft, m);
-#endif	
-	if (fle != NULL && m != NULL && (m->m_flags & M_FLOWID) == 0) {
-		m->m_flags |= M_FLOWID;
-		m->m_pkthdr.flowid = fle->f_fhash;
-	}
-	return (fle);
-}
-	
-struct flentry *
-flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
-    struct sockaddr_storage *dsa, uint32_t fibnum, int flags)
-{
-	uint32_t key[9], hash;
-	struct flentry *fle;
-	struct flowtable_stats *fs = &ft->ft_stats[curcpu];
-	uint8_t proto = 0;
-	int error = 0;
-	struct rtentry *rt;
-	struct llentry *lle;
-	struct route sro, *ro;
-	struct route_in6 sro6;
+	if (ft == &V_ip6_ft)
+		lt = LLTABLE6(ifp);
+#endif
 
-	sro.ro_rt = sro6.ro_rt = NULL;
-	sro.ro_lle = sro6.ro_lle = NULL;
-	ro = NULL;
-	hash = 0;
-	flags |= ft->ft_flags;
-	proto = flags_to_proto(flags);
-#ifdef INET
-	if (ssa->ss_family == AF_INET) {
-		struct sockaddr_in *ssin, *dsin;
+	if (rt->rt_flags & RTF_GATEWAY)
+		l3addr = (struct sockaddr_storage *)rt->rt_gateway;
+	else
+		l3addr = (struct sockaddr_storage *)&ro->ro_dst;
+	lle = llentry_alloc(ifp, lt, l3addr);
 
-		ro = &sro;
-		memcpy(&ro->ro_dst, dsa, sizeof(struct sockaddr_in));
-		/*
-		 * The harvested source and destination addresses
-		 * may contain port information if the packet is 
-		 * from a transport protocol (e.g. TCP/UDP). The 
-		 * port field must be cleared before performing 
-		 * a route lookup.
-		 */
-		((struct sockaddr_in *)&ro->ro_dst)->sin_port = 0;
-		dsin = (struct sockaddr_in *)dsa;
-		ssin = (struct sockaddr_in *)ssa;
-		if ((dsin->sin_addr.s_addr == ssin->sin_addr.s_addr) ||
-		    (ntohl(dsin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
-		    (ntohl(ssin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
-			return (NULL);
+	if (lle == NULL) {
+		RTFREE(rt);
+		return (NULL);
+	}
 
-		hash = ipv4_flow_lookup_hash_internal(ssin, dsin, key, flags);
+	/* Don't insert the entry if the ARP hasn't yet finished resolving. */
+	if ((lle->la_flags & LLE_VALID) == 0) {
+		RTFREE(rt);
+		LLE_FREE(lle);
+		FLOWSTAT_INC(ft, ft_fail_lle_invalid);
+		return (NULL);
 	}
-#endif
-#ifdef INET6
-	if (ssa->ss_family == AF_INET6) {
-		struct sockaddr_in6 *ssin6, *dsin6;
 
-		ro = (struct route *)&sro6;
-		memcpy(&sro6.ro_dst, dsa,
-		    sizeof(struct sockaddr_in6));
-		((struct sockaddr_in6 *)&ro->ro_dst)->sin6_port = 0;
-		dsin6 = (struct sockaddr_in6 *)dsa;
-		ssin6 = (struct sockaddr_in6 *)ssa;
+	fle = uma_zalloc(flow_zone, M_NOWAIT | M_ZERO);
+	if (fle == NULL) {
+		RTFREE(rt);
+		LLE_FREE(lle);
+		return (NULL);
+	}
 
-		flags |= FL_IPV6;
-		hash = ipv6_flow_lookup_hash_internal(ssin6, dsin6, key, flags);
-	}
+	fle->f_hash = hash;
+	bcopy(key, &fle->f_key, keylen);
+	fle->f_rt = rt;
+	fle->f_lle = lle;
+	fle->f_fibnum = fibnum;
+	fle->f_uptime = time_uptime;
+#ifdef FLOWTABLE_HASH_ALL
+	fle->f_proto = proto;
+	fle->f_flags = fibnum0 >> 24;
 #endif
-	/*
-	 * Ports are zero and this isn't a transmit cache
-	 * - thus not a protocol for which we need to keep 
-	 * state
-	 * FL_HASH_ALL => key[0] != 0 for TCP || UDP || SCTP
-	 */
-	if (hash == 0 || (key[0] == 0 && (ft->ft_flags & FL_HASH_ALL)))
-		return (NULL);
 
-	fs->ft_lookups++;
-	FL_ENTRY_LOCK(ft, hash);
-	if ((fle = FL_ENTRY(ft, hash)) == NULL) {
-		FL_ENTRY_UNLOCK(ft, hash);
-		goto uncached;
+	critical_enter();
+	mask = flowtable_mask(ft);
+	flist = flowtable_list(ft, hash);
+
+	if (SLIST_EMPTY(flist)) {
+		bit_set(mask, (hash % ft->ft_size));
+		SLIST_INSERT_HEAD(flist, fle, f_next);
+		goto skip;
 	}
-keycheck:	
-	rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
-	lle = __DEVOLATILE(struct llentry *, fle->f_lle);
-	if ((rt != NULL)
-	    && lle != NULL
-	    && fle->f_fhash == hash
-	    && flowtable_key_equal(fle, key)
-	    && (proto == fle->f_proto)
-	    && (fibnum == fle->f_fibnum)
-	    && (rt->rt_flags & RTF_UP)
-	    && (rt->rt_ifp != NULL)
-	    && (lle->la_flags & LLE_VALID)) {
-		fs->ft_hits++;
-		fle->f_uptime = time_uptime;
-		fle->f_flags |= flags;
-		FL_ENTRY_UNLOCK(ft, hash);
-		return (fle);
-	} else if (fle->f_next != NULL) {
-		fle = fle->f_next;
-		goto keycheck;
-	}
-	FL_ENTRY_UNLOCK(ft, hash);
-uncached:
-	if (flags & FL_NOAUTO || flow_full(ft))
-		return (NULL);
 
-	fs->ft_misses++;
 	/*
-	 * This bit of code ends up locking the
-	 * same route 3 times (just like ip_output + ether_output)
-	 * - at lookup
-	 * - in rt_check when called by arpresolve
-	 * - dropping the refcount for the rtentry
-	 *
-	 * This could be consolidated to one if we wrote a variant
-	 * of arpresolve with an rt_check variant that expected to
-	 * receive the route locked
+	 * find end of list and make sure that we were not
+	 * preempted by another thread handling this flow
 	 */
-
-#ifdef INVARIANTS
-	if ((ro->ro_dst.sa_family != AF_INET) &&
-	    (ro->ro_dst.sa_family != AF_INET6))
-		panic("sa_family == %d\n", ro->ro_dst.sa_family);
+	SLIST_FOREACH(iter, flist, f_next) {
+		KASSERT(iter->f_hash % ft->ft_size == hash % ft->ft_size,
+		    ("%s: wrong hash", __func__));
+		if (flow_matches(iter, key, keylen, fibnum)) {
+			/*
+			 * We probably migrated to an other CPU after
+			 * lookup in flowtable_lookup_common() failed.
+			 * It appeared that this CPU already has flow
+			 * entry.
+			 */
+			iter->f_uptime = time_uptime;
+#ifdef FLOWTABLE_HASH_ALL
+			iter->f_flags |= fibnum >> 24;
 #endif
+			critical_exit();
+			FLOWSTAT_INC(ft, ft_collisions);
+			uma_zfree(flow_zone, fle);
+			return (iter);
+		}
+	}
 
-	ft->ft_rtalloc(ro, hash, fibnum);
-	if (ro->ro_rt == NULL) 
-		error = ENETUNREACH;
-	else {
-		struct llentry *lle = NULL;
-		struct sockaddr_storage *l3addr;
-		struct rtentry *rt = ro->ro_rt;
-		struct ifnet *ifp = rt->rt_ifp;
+	SLIST_INSERT_HEAD(flist, fle, f_next);
+skip:
+	critical_exit();
+	FLOWSTAT_INC(ft, ft_inserts);
 
-		if (ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) {
-			RTFREE(rt);
-			ro->ro_rt = NULL;
-			return (NULL);
-		}
-#ifdef INET6
-		if (ssa->ss_family == AF_INET6) {
-			struct sockaddr_in6 *dsin6;
+	return (fle);
+}
 
-			dsin6 = (struct sockaddr_in6 *)dsa;			
-			if (in6_localaddr(&dsin6->sin6_addr)) {
-				RTFREE(rt);
-				ro->ro_rt = NULL;
-				return (NULL);				
-			}
+int
+flowtable_lookup(sa_family_t sa, struct mbuf *m, struct route *ro)
+{
+	struct flentry *fle;
 
-			if (rt->rt_flags & RTF_GATEWAY)
-				l3addr = (struct sockaddr_storage *)rt->rt_gateway;
-			
-			else
-				l3addr = (struct sockaddr_storage *)&ro->ro_dst;
-			lle = llentry_alloc(ifp, LLTABLE6(ifp), l3addr);
-		}
-#endif	
+	if (V_flowtable_enable == 0)
+		return (ENXIO);
+
+	switch (sa) {
 #ifdef INET
-		if (ssa->ss_family == AF_INET) {
-			if (rt->rt_flags & RTF_GATEWAY)
-				l3addr = (struct sockaddr_storage *)rt->rt_gateway;
-			else
-				l3addr = (struct sockaddr_storage *)&ro->ro_dst;
-			lle = llentry_alloc(ifp, LLTABLE(ifp), l3addr);	
-		}
-			
+	case AF_INET:
+		fle = flowtable_lookup_ipv4(m, ro);
+		break;
 #endif
-		ro->ro_lle = lle;
+#ifdef INET6
+	case AF_INET6:
+		fle = flowtable_lookup_ipv6(m, ro);
+		break;
+#endif
+	default:
+		panic("%s: sa %d", __func__, sa);
+	}
 
-		if (lle == NULL) {
-			RTFREE(rt);
-			ro->ro_rt = NULL;
-			return (NULL);
-		}
-		error = flowtable_insert(ft, hash, key, fibnum, ro, flags);
+	if (fle == NULL)
+		return (EHOSTUNREACH);
 
-		if (error) {
-			RTFREE(rt);
-			LLE_FREE(lle);
-			ro->ro_rt = NULL;
-			ro->ro_lle = NULL;
-		}
-	} 
+	if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE) {
+		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
+		m->m_pkthdr.flowid = fle->f_hash;
+	}
 
-	return ((error) ? NULL : fle);
+	ro->ro_rt = fle->f_rt;
+	ro->ro_lle = fle->f_lle;
+	ro->ro_flags |= RT_NORTREF;
+
+	return (0);
 }
 
-/*
- * used by the bit_alloc macro
- */
-#define calloc(count, size) malloc((count)*(size), M_DEVBUF, M_WAITOK|M_ZERO)
-	
-struct flowtable *
-flowtable_alloc(char *name, int nentry, int flags)
+static struct flentry *
+flowtable_lookup_common(struct flowtable *ft, uint32_t *key, int keylen,
+    uint32_t fibnum)
 {
-	struct flowtable *ft, *fttail;
-	int i;
+	struct flist *flist;
+	struct flentry *fle;
+	uint32_t hash;
 
-	if (V_flow_hashjitter == 0)
-		V_flow_hashjitter = arc4random();
+	FLOWSTAT_INC(ft, ft_lookups);
 
-	KASSERT(nentry > 0, ("nentry must be > 0, is %d\n", nentry));
+	hash = jenkins_hash32(key, keylen / sizeof(uint32_t), flow_hashjitter);
 
-	ft = malloc(sizeof(struct flowtable),
-	    M_RTABLE, M_WAITOK | M_ZERO);
-
-	ft->ft_name = name;
-	ft->ft_flags = flags;
-	ft->ft_size = nentry;
-#ifdef RADIX_MPATH
-	ft->ft_rtalloc = rtalloc_mpath_fib;
-#else
-	ft->ft_rtalloc = rtalloc_ign_wrapper;
+	critical_enter();
+	flist = flowtable_list(ft, hash);
+	SLIST_FOREACH(fle, flist, f_next) {
+		KASSERT(fle->f_hash % ft->ft_size == hash % ft->ft_size,
+		    ("%s: wrong hash", __func__));
+		if (flow_matches(fle, key, keylen, fibnum)) {
+			fle->f_uptime = time_uptime;
+#ifdef FLOWTABLE_HASH_ALL
+			fle->f_flags |= fibnum >> 24;
 #endif
-	if (flags & FL_PCPU) {
-		ft->ft_lock = flowtable_pcpu_lock;
-		ft->ft_unlock = flowtable_pcpu_unlock;
-
-		for (i = 0; i <= mp_maxid; i++) {
-			ft->ft_table.pcpu[i] =
-			    malloc(nentry*sizeof(struct flentry *),
-				M_RTABLE, M_WAITOK | M_ZERO);
-			ft->ft_masks[i] = bit_alloc(nentry);
+			critical_exit();
+			FLOWSTAT_INC(ft, ft_hits);
+			return (fle);
 		}
-	} else {
-		ft->ft_lock_count = 2*(powerof2(mp_maxid + 1) ? (mp_maxid + 1):
-		    (fls(mp_maxid + 1) << 1));
-		
-		ft->ft_lock = flowtable_global_lock;
-		ft->ft_unlock = flowtable_global_unlock;
-		ft->ft_table.global =
-			    malloc(nentry*sizeof(struct flentry *),
-				M_RTABLE, M_WAITOK | M_ZERO);
-		ft->ft_locks = malloc(ft->ft_lock_count*sizeof(struct mtx),
-				M_RTABLE, M_WAITOK | M_ZERO);
-		for (i = 0; i < ft->ft_lock_count; i++)
-			mtx_init(&ft->ft_locks[i], "flow", NULL, MTX_DEF|MTX_DUPOK);
-
-		ft->ft_masks[0] = bit_alloc(nentry);
 	}
-	ft->ft_tmpmask = bit_alloc(nentry);
+	critical_exit();
 
-	/*
-	 * In the local transmit case the table truly is 
-	 * just a cache - so everything is eligible for
-	 * replacement after 5s of non-use
-	 */
-	if (flags & FL_HASH_ALL) {
-		ft->ft_udp_idle = V_flowtable_udp_expire;
-		ft->ft_syn_idle = V_flowtable_syn_expire;
-		ft->ft_fin_wait_idle = V_flowtable_fin_wait_expire;
-		ft->ft_tcp_idle = V_flowtable_fin_wait_expire;
-	} else {
-		ft->ft_udp_idle = ft->ft_fin_wait_idle =
-		    ft->ft_syn_idle = ft->ft_tcp_idle = 30;
-		
-	}
+	FLOWSTAT_INC(ft, ft_misses);
 
-	/*
-	 * hook in to the cleaner list
-	 */
-	if (V_flow_list_head == NULL)
-		V_flow_list_head = ft;
-	else {
-		fttail = V_flow_list_head;
-		while (fttail->ft_next != NULL)
-			fttail = fttail->ft_next;
-		fttail->ft_next = ft;
-	}
-
-	return (ft);
+	return (flowtable_insert(ft, hash, key, keylen, fibnum));
 }
 
 /*
- * The rest of the code is devoted to garbage collection of expired entries.
- * It is a new additon made necessary by the switch to dynamically allocating
- * flow tables.
- * 
+ * used by the bit_alloc macro
  */
+#define calloc(count, size) malloc((count)*(size), M_FTABLE, M_WAITOK | M_ZERO)
 static void
-fle_free(struct flentry *fle, struct flowtable *ft)
+flowtable_alloc(struct flowtable *ft)
 {
-	struct rtentry *rt;
-	struct llentry *lle;
 
-	rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
-	lle = __DEVOLATILE(struct llentry *, fle->f_lle);
-	if (rt != NULL)
-		RTFREE(rt);
-	if (lle != NULL)
-		LLE_FREE(lle);
-	flow_free(fle, ft);
+	ft->ft_table = malloc(ft->ft_size * sizeof(struct flist),
+	    M_FTABLE, M_WAITOK);
+	for (int i = 0; i < ft->ft_size; i++)
+		ft->ft_table[i] = uma_zalloc(pcpu_zone_ptr, M_WAITOK | M_ZERO);
+
+	ft->ft_masks = uma_zalloc(pcpu_zone_ptr, M_WAITOK);
+	for (int i = 0; i < mp_ncpus; i++) {
+		bitstr_t **b;
+
+		b = zpcpu_get_cpu(ft->ft_masks, i);
+		*b = bit_alloc(ft->ft_size);
+	}
+	ft->ft_tmpmask = bit_alloc(ft->ft_size);
 }
+#undef calloc
 
 static void
-flowtable_free_stale(struct flowtable *ft, struct rtentry *rt)
+flowtable_free_stale(struct flowtable *ft, struct rtentry *rt, int maxidle)
 {
-	int curbit = 0, count;
-	struct flentry *fle,  **flehead, *fleprev;
-	struct flentry *flefreehead, *flefreetail, *fletmp;
+	struct flist *flist, freelist;
+	struct flentry *fle, *fle1, *fleprev;
 	bitstr_t *mask, *tmpmask;
-	struct flowtable_stats *fs = &ft->ft_stats[curcpu];
+	int curbit, tmpsize;
 
-	flefreehead = flefreetail = NULL;
+	SLIST_INIT(&freelist);
 	mask = flowtable_mask(ft);
 	tmpmask = ft->ft_tmpmask;
+	tmpsize = ft->ft_size;
 	memcpy(tmpmask, mask, ft->ft_size/8);
+	curbit = 0;
+	fleprev = NULL; /* pacify gcc */
 	/*
 	 * XXX Note to self, bit_ffs operates at the byte level
 	 * and thus adds gratuitous overhead
@@ -1425,129 +787,96 @@
 			break;
 		}
 
-		FL_ENTRY_LOCK(ft, curbit);
-		flehead = flowtable_entry(ft, curbit);
-		fle = fleprev = *flehead;
+		FLOWSTAT_INC(ft, ft_free_checks);
 
-		fs->ft_free_checks++;
+		critical_enter();
+		flist = flowtable_list(ft, curbit);
 #ifdef DIAGNOSTIC
-		if (fle == NULL && curbit > 0) {
+		if (SLIST_EMPTY(flist) && curbit > 0) {
 			log(LOG_ALERT,
 			    "warning bit=%d set, but no fle found\n",
 			    curbit);
 		}
-#endif		
-		while (fle != NULL) {
-			if (rt != NULL) {
-				if (__DEVOLATILE(struct rtentry *, fle->f_rt) != rt) {
-					fleprev = fle;
-					fle = fle->f_next;
-					continue;
-				}
-			} else if (!flow_stale(ft, fle)) {
+#endif
+		SLIST_FOREACH_SAFE(fle, flist, f_next, fle1) {
+			if (rt != NULL && fle->f_rt != rt) {
 				fleprev = fle;
-				fle = fle->f_next;
 				continue;
 			}
-			/*
-			 * delete head of the list
-			 */
-			if (fleprev == *flehead) {
-				fletmp = fleprev;
-				if (fle == fleprev) {
-					fleprev = *flehead = fle->f_next;
-				} else
-					fleprev = *flehead = fle;
-				fle = fle->f_next;
-			} else {
-				/*
-				 * don't advance fleprev
-				 */
-				fletmp = fle;
-				fleprev->f_next = fle->f_next;
-				fle = fleprev->f_next;
+			if (!flow_stale(ft, fle, maxidle)) {
+				fleprev = fle;
+				continue;
 			}
 
-			if (flefreehead == NULL)
-				flefreehead = flefreetail = fletmp;
-			else {
-				flefreetail->f_next = fletmp;
-				flefreetail = fletmp;
-			}
-			fletmp->f_next = NULL;
+			if (fle == SLIST_FIRST(flist))
+				SLIST_REMOVE_HEAD(flist, f_next);
+			else
+				SLIST_REMOVE_AFTER(fleprev, f_next);
+			SLIST_INSERT_HEAD(&freelist, fle, f_next);
 		}
-		if (*flehead == NULL)
+		if (SLIST_EMPTY(flist))
 			bit_clear(mask, curbit);
-		FL_ENTRY_UNLOCK(ft, curbit);
+		critical_exit();
+
 		bit_clear(tmpmask, curbit);
-		bit_ffs(tmpmask, ft->ft_size, &curbit);
+		tmpmask += (curbit / 8);
+		tmpsize -= (curbit / 8) * 8;
+		bit_ffs(tmpmask, tmpsize, &curbit);
 	}
-	count = 0;
-	while ((fle = flefreehead) != NULL) {
-		flefreehead = fle->f_next;
-		count++;
-		fs->ft_frees++;
-		fle_free(fle, ft);
+
+	SLIST_FOREACH_SAFE(fle, &freelist, f_next, fle1) {
+		FLOWSTAT_INC(ft, ft_frees);
+		if (fle->f_rt != NULL)
+			RTFREE(fle->f_rt);
+		if (fle->f_lle != NULL)
+			LLE_FREE(fle->f_lle);
+		uma_zfree(flow_zone, fle);
 	}
-	if (V_flowtable_debug && count)
-		log(LOG_DEBUG, "freed %d flow entries\n", count);
 }
 
-void
-flowtable_route_flush(struct flowtable *ft, struct rtentry *rt)
+static void
+flowtable_clean_vnet(struct flowtable *ft, struct rtentry *rt, int maxidle)
 {
 	int i;
 
-	if (ft->ft_flags & FL_PCPU) {
-		CPU_FOREACH(i) {
-			if (smp_started == 1) {
-				thread_lock(curthread);
-				sched_bind(curthread, i);
-				thread_unlock(curthread);
-			}
+	CPU_FOREACH(i) {
+		if (smp_started == 1) {
+			thread_lock(curthread);
+			sched_bind(curthread, i);
+			thread_unlock(curthread);
+		}
 
-			flowtable_free_stale(ft, rt);
+		flowtable_free_stale(ft, rt, maxidle);
 
-			if (smp_started == 1) {
-				thread_lock(curthread);
-				sched_unbind(curthread);
-				thread_unlock(curthread);
-			}
+		if (smp_started == 1) {
+			thread_lock(curthread);
+			sched_unbind(curthread);
+			thread_unlock(curthread);
 		}
-	} else {
-		flowtable_free_stale(ft, rt);
 	}
 }
 
-static void
-flowtable_clean_vnet(void)
+void
+flowtable_route_flush(sa_family_t sa, struct rtentry *rt)
 {
 	struct flowtable *ft;
-	int i;
 
-	ft = V_flow_list_head;
-	while (ft != NULL) {
-		if (ft->ft_flags & FL_PCPU) {
-			CPU_FOREACH(i) {
-				if (smp_started == 1) {
-					thread_lock(curthread);
-					sched_bind(curthread, i);
-					thread_unlock(curthread);
-				}
+	switch (sa) {
+#ifdef INET
+	case AF_INET:
+		ft = &V_ip4_ft;
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		ft = &V_ip6_ft;
+		break;
+#endif
+	default:
+		panic("%s: sa %d", __func__, sa);
+	}
 
-				flowtable_free_stale(ft, NULL);
-
-				if (smp_started == 1) {
-					thread_lock(curthread);
-					sched_unbind(curthread);
-					thread_unlock(curthread);
-				}
-			}
-		} else {
-			flowtable_free_stale(ft, NULL);
-		}
-		ft = ft->ft_next;
-	}
+	flowtable_clean_vnet(ft, rt, 0);
 }
 
 static void
@@ -1560,18 +889,33 @@
 		log(LOG_INFO, "flowtable cleaner started\n");
 	td = curthread;
 	while (1) {
+		uint32_t flowclean_freq, maxidle;
+
+		/*
+		 * The maximum idle time, as well as frequency are arbitrary.
+		 */
+		if (flow_full())
+			maxidle = 5;
+		else
+			maxidle = 30;
+
 		VNET_LIST_RLOCK();
 		VNET_FOREACH(vnet_iter) {
 			CURVNET_SET(vnet_iter);
-			flowtable_clean_vnet();
+#ifdef INET
+			flowtable_clean_vnet(&V_ip4_ft, NULL, maxidle);
+#endif
+#ifdef INET6
+			flowtable_clean_vnet(&V_ip6_ft, NULL, maxidle);
+#endif
 			CURVNET_RESTORE();
 		}
 		VNET_LIST_RUNLOCK();
 
-		/*
-		 * The 10 second interval between cleaning checks
-		 * is arbitrary
-		 */
+		if (flow_full())
+			flowclean_freq = 4*hz;
+		else
+			flowclean_freq = 20*hz;
 		mtx_lock(&flowclean_lock);
 		thread_lock(td);
 		sched_prio(td, PPAUSE);
@@ -1604,91 +948,106 @@
 };
 SYSINIT(flowcleaner, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &flow_kp);
 
-static void
-flowtable_init_vnet(const void *unused __unused)
+static int
+flowtable_get_size(char *name)
 {
+	int size;
 
-	V_flowtable_nmbflows = 1024 + maxusers * 64 * mp_ncpus;
-	V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
-	    NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
-	V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
-	    NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);	
-	uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows);
-	uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
-	V_flowtable_ready = 1;
+	if (TUNABLE_INT_FETCH(name, &size)) {
+		if (size < 256)
+			size = 256;
+		if (!powerof2(size)) {
+			printf("%s must be power of 2\n", name);
+			size = 2048;
+		}
+	} else {
+		/*
+		 * round up to the next power of 2
+		 */
+		size = 1 << fls((1024 + maxusers * 64) - 1);
+	}
+
+	return (size);
 }
-VNET_SYSINIT(flowtable_init_vnet, SI_SUB_SMP, SI_ORDER_ANY,
-    flowtable_init_vnet, NULL);
 
 static void
 flowtable_init(const void *unused __unused)
 {
 
+	flow_hashjitter = arc4random();
+
+	flow_zone = uma_zcreate("flows", sizeof(struct flentry),
+	    NULL, NULL, NULL, NULL, (64-1), UMA_ZONE_MAXBUCKET);
+	uma_zone_set_max(flow_zone, 1024 + maxusers * 64 * mp_ncpus);
+
 	cv_init(&flowclean_c_cv, "c_flowcleanwait");
 	cv_init(&flowclean_f_cv, "f_flowcleanwait");
 	mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF);
 	EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL,
 	    EVENTHANDLER_PRI_ANY);
-	flowclean_freq = 20*hz;
 }
-SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST,
+SYSINIT(flowtable_init, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST,
     flowtable_init, NULL);
 
+#ifdef INET
+static SYSCTL_NODE(_net_flowtable, OID_AUTO, ip4, CTLFLAG_RD, NULL,
+    "Flowtable for IPv4");
 
-#ifdef VIMAGE
+static VNET_PCPUSTAT_DEFINE(struct flowtable_stat, ip4_ftstat);
+VNET_PCPUSTAT_SYSINIT(ip4_ftstat);
+VNET_PCPUSTAT_SYSUNINIT(ip4_ftstat);
+SYSCTL_VNET_PCPUSTAT(_net_flowtable_ip4, OID_AUTO, stat, struct flowtable_stat,
+    ip4_ftstat, "Flowtable statistics for IPv4 "
+    "(struct flowtable_stat, net/flowtable.h)");
+
 static void
-flowtable_uninit(const void *unused __unused)
+flowtable_init_vnet_v4(const void *unused __unused)
 {
 
-	V_flowtable_ready = 0;
-	uma_zdestroy(V_flow_ipv4_zone);
-	uma_zdestroy(V_flow_ipv6_zone);
+	V_ip4_ft.ft_size = flowtable_get_size("net.flowtable.ip4.size");
+	V_ip4_ft.ft_stat = VNET(ip4_ftstat);
+	flowtable_alloc(&V_ip4_ft);
 }
+VNET_SYSINIT(ft_vnet_v4, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    flowtable_init_vnet_v4, NULL);
+#endif /* INET */
 
-VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
-    flowtable_uninit, NULL);
-#endif
+#ifdef INET6
+static SYSCTL_NODE(_net_flowtable, OID_AUTO, ip6, CTLFLAG_RD, NULL,
+    "Flowtable for IPv6");
 
-#ifdef DDB
-static uint32_t *
-flowtable_get_hashkey(struct flentry *fle)
+static VNET_PCPUSTAT_DEFINE(struct flowtable_stat, ip6_ftstat);
+VNET_PCPUSTAT_SYSINIT(ip6_ftstat);
+VNET_PCPUSTAT_SYSUNINIT(ip6_ftstat);
+SYSCTL_VNET_PCPUSTAT(_net_flowtable_ip6, OID_AUTO, stat, struct flowtable_stat,
+    ip6_ftstat, "Flowtable statistics for IPv6 "
+    "(struct flowtable_stat, net/flowtable.h)");
+
+static void
+flowtable_init_vnet_v6(const void *unused __unused)
 {
-	uint32_t *hashkey;
 
-	if (fle->f_flags & FL_IPV6)
-		hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
-	else
-		hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
-
-	return (hashkey);
+	V_ip6_ft.ft_size = flowtable_get_size("net.flowtable.ip6.size");
+	V_ip6_ft.ft_stat = VNET(ip6_ftstat);
+	flowtable_alloc(&V_ip6_ft);
 }
+VNET_SYSINIT(flowtable_init_vnet_v6, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    flowtable_init_vnet_v6, NULL);
+#endif /* INET6 */
 
+#ifdef DDB
 static bitstr_t *
 flowtable_mask_pcpu(struct flowtable *ft, int cpuid)
 {
-	bitstr_t *mask;
 
-	if (ft->ft_flags & FL_PCPU)
-		mask = ft->ft_masks[cpuid];
-	else
-		mask = ft->ft_masks[0];
-
-	return (mask);
+	return (zpcpu_get_cpu(*ft->ft_masks, cpuid));
 }
 
-static struct flentry **
-flowtable_entry_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
+static struct flist *
+flowtable_list_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
 {
-	struct flentry **fle;
-	int index = (hash % ft->ft_size);
 
-	if (ft->ft_flags & FL_PCPU) {
-		fle = &ft->ft_table.pcpu[cpuid][index];
-	} else {
-		fle = &ft->ft_table.global[index];
-	}
-	
-	return (fle);
+	return (zpcpu_get_cpu(&ft->ft_table[hash % ft->ft_size], cpuid));
 }
 
 static void
@@ -1696,40 +1055,58 @@
 {
 	int idle_time;
 	int rt_valid, ifp_valid;
-	uint16_t sport, dport;
-	uint32_t *hashkey;
-	char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
 	volatile struct rtentry *rt;
 	struct ifnet *ifp = NULL;
+	uint32_t *hashkey = fle->f_key;
 
 	idle_time = (int)(time_uptime - fle->f_uptime);
 	rt = fle->f_rt;
 	rt_valid = rt != NULL;
-	if (rt_valid) 
+	if (rt_valid)
 		ifp = rt->rt_ifp;
 	ifp_valid = ifp != NULL;
-	hashkey = flowtable_get_hashkey(fle);
-	if (fle->f_flags & FL_IPV6)
-		goto skipaddr;
 
-	inet_ntoa_r(*(struct in_addr *) &hashkey[2], daddr);
-	if (ft->ft_flags & FL_HASH_ALL) {
-		inet_ntoa_r(*(struct in_addr *) &hashkey[1], saddr);		
-		sport = ntohs(((uint16_t *)hashkey)[0]);
-		dport = ntohs(((uint16_t *)hashkey)[1]);
-		db_printf("%s:%d->%s:%d",
-		    saddr, sport, daddr,
-		    dport);
-	} else 
+#ifdef INET
+	if (ft == &V_ip4_ft) {
+		char daddr[4*sizeof "123"];
+#ifdef FLOWTABLE_HASH_ALL
+		char saddr[4*sizeof "123"];
+		uint16_t sport, dport;
+#endif
+
+		inet_ntoa_r(*(struct in_addr *) &hashkey[0], daddr);
+#ifdef FLOWTABLE_HASH_ALL
+		inet_ntoa_r(*(struct in_addr *) &hashkey[1], saddr);
+		dport = ntohs((uint16_t)(hashkey[2] >> 16));
+		sport = ntohs((uint16_t)(hashkey[2] & 0xffff));
+		db_printf("%s:%d->%s:%d", saddr, sport, daddr, dport);
+#else
 		db_printf("%s ", daddr);
-    
-skipaddr:
+#endif
+	}
+#endif /* INET */
+#ifdef INET6
+	if (ft == &V_ip6_ft) {
+#ifdef FLOWTABLE_HASH_ALL
+		db_printf("\n\tkey=%08x:%08x:%08x%08x:%08x:%08x%08x:%08x:%08x",
+		    hashkey[0], hashkey[1], hashkey[2],
+		    hashkey[3], hashkey[4], hashkey[5],
+		    hashkey[6], hashkey[7], hashkey[8]);
+#else
+		db_printf("\n\tkey=%08x:%08x:%08x ",
+		    hashkey[0], hashkey[1], hashkey[2]);
+#endif
+	}
+#endif /* INET6 */
+
+	db_printf("hash=%08x idle_time=%03d"
+	    "\n\tfibnum=%02d rt=%p",
+	    fle->f_hash, idle_time, fle->f_fibnum, fle->f_rt);
+
+#ifdef FLOWTABLE_HASH_ALL
 	if (fle->f_flags & FL_STALE)
 		db_printf(" FL_STALE ");
-	if (fle->f_flags & FL_TCP)
-		db_printf(" FL_TCP ");
-	if (fle->f_flags & FL_UDP)
-		db_printf(" FL_UDP ");
+#endif
 	if (rt_valid) {
 		if (rt->rt_flags & RTF_UP)
 			db_printf(" RTF_UP ");
@@ -1738,21 +1115,10 @@
 		if (ifp->if_flags & IFF_LOOPBACK)
 			db_printf(" IFF_LOOPBACK ");
 		if (ifp->if_flags & IFF_UP)
-			db_printf(" IFF_UP ");		
+			db_printf(" IFF_UP ");
 		if (ifp->if_flags & IFF_POINTOPOINT)
-			db_printf(" IFF_POINTOPOINT ");		
+			db_printf(" IFF_POINTOPOINT ");
 	}
-	if (fle->f_flags & FL_IPV6)
-		db_printf("\n\tkey=%08x:%08x:%08x%08x:%08x:%08x%08x:%08x:%08x",
-		    hashkey[0], hashkey[1], hashkey[2],
-		    hashkey[3], hashkey[4], hashkey[5],
-		    hashkey[6], hashkey[7], hashkey[8]);
-	else
-		db_printf("\n\tkey=%08x:%08x:%08x ",
-		    hashkey[0], hashkey[1], hashkey[2]);
-	db_printf("hash=%08x idle_time=%03d"
-	    "\n\tfibnum=%02d rt=%p",
-	    fle->f_fhash, idle_time, fle->f_fibnum, fle->f_rt);
 	db_printf("\n");
 }
 
@@ -1760,7 +1126,6 @@
 flowtable_show(struct flowtable *ft, int cpuid)
 {
 	int curbit = 0;
-	struct flentry *fle,  **flehead;
 	bitstr_t *mask, *tmpmask;
 
 	if (cpuid != -1)
@@ -1774,6 +1139,9 @@
 	 */
 	bit_ffs(tmpmask, ft->ft_size, &curbit);
 	while (curbit != -1) {
+		struct flist *flist;
+		struct flentry *fle;
+
 		if (curbit >= ft->ft_size || curbit < -1) {
 			db_printf("warning: bad curbit value %d \n",
 			    curbit);
@@ -1780,14 +1148,10 @@
 			break;
 		}
 
-		flehead = flowtable_entry_pcpu(ft, curbit, cpuid);
-		fle = *flehead;
+		flist = flowtable_list_pcpu(ft, curbit, cpuid);
 
-		while (fle != NULL) {	
+		SLIST_FOREACH(fle, flist, f_next)
 			flow_show(ft, fle);
-			fle = fle->f_next;
-			continue;
-		}
 		bit_clear(tmpmask, curbit);
 		bit_ffs(tmpmask, ft->ft_size, &curbit);
 	}
@@ -1794,23 +1158,13 @@
 }
 
 static void
-flowtable_show_vnet(void)
+flowtable_show_vnet(struct flowtable *ft)
 {
-	struct flowtable *ft;
+
 	int i;
 
-	ft = V_flow_list_head;
-	while (ft != NULL) {
-		printf("name: %s\n", ft->ft_name);
-		if (ft->ft_flags & FL_PCPU) {
-			CPU_FOREACH(i) {
-				flowtable_show(ft, i);
-			}
-		} else {
-			flowtable_show(ft, -1);
-		}
-		ft = ft->ft_next;
-	}
+	CPU_FOREACH(i)
+		flowtable_show(ft, i);
 }
 
 DB_SHOW_COMMAND(flowtables, db_show_flowtables)
@@ -1822,7 +1176,14 @@
 #ifdef VIMAGE
 		db_printf("vnet %p\n", vnet_iter);
 #endif
-		flowtable_show_vnet();
+#ifdef INET
+		printf("IPv4:\n");
+		flowtable_show_vnet(&V_ip4_ft);
+#endif
+#ifdef INET6
+		printf("IPv6:\n");
+		flowtable_show_vnet(&V_ip6_ft);
+#endif
 		CURVNET_RESTORE();
 	}
 }

Modified: trunk/sys/net/flowtable.h
===================================================================
--- trunk/sys/net/flowtable.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/flowtable.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,84 +1,57 @@
 /* $MidnightBSD$ */
-/**************************************************************************
+/*-
+ * Copyright (c) 2014 Gleb Smirnoff <glebius at FreeBSD.org>
+ * Copyright (c) 2008-2010, BitGravity Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Neither the name of the BitGravity Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/net/flowtable.h 262743 2014-03-04 15:14:47Z glebius $
+ *
+ */
 
-Copyright (c) 2008-2010, BitGravity Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the BitGravity Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-$FreeBSD: stable/9/sys/net/flowtable.h 208171 2010-05-16 21:48:39Z kmacy $
-
-***************************************************************************/
-
 #ifndef	_NET_FLOWTABLE_H_
 #define	_NET_FLOWTABLE_H_
 
+struct flowtable_stat {
+	uint64_t	ft_collisions;
+	uint64_t	ft_misses;
+	uint64_t	ft_free_checks;
+	uint64_t	ft_frees;
+	uint64_t	ft_hits;
+	uint64_t	ft_lookups;
+	uint64_t	ft_fail_lle_invalid;
+	uint64_t	ft_inserts;
+};
+
 #ifdef	_KERNEL
 
-#define	FL_HASH_ALL	(1<<0)	/* hash 4-tuple + protocol */
-#define	FL_PCPU		(1<<1)	/* pcpu cache */
-#define	FL_NOAUTO	(1<<2)	/* don't automatically add flentry on miss */
-#define FL_IPV6  	(1<<9)
-
-#define	FL_TCP		(1<<11)
-#define	FL_SCTP		(1<<12)
-#define	FL_UDP		(1<<13)
-#define	FL_DEBUG	(1<<14)
-#define	FL_DEBUG_ALL	(1<<15)
-
-struct flowtable;
-struct flentry;
-struct route;
-struct route_in6;
-
-VNET_DECLARE(struct flowtable *, ip_ft);
-#define	V_ip_ft			VNET(ip_ft)
-
-VNET_DECLARE(struct flowtable *, ip6_ft);
-#define	V_ip6_ft		VNET(ip6_ft)
-
-struct flowtable *flowtable_alloc(char *name, int nentry, int flags);
-
 /*
- * Given a flow table, look up the L3 and L2 information and
- * return it in the route.
- *
+ * Given a flow table, look up the L3 and L2 information
+ * and return it in the route.
  */
-struct flentry *flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af);
+int flowtable_lookup(sa_family_t, struct mbuf *, struct route *);
+void flowtable_route_flush(sa_family_t, struct rtentry *);
 
-struct flentry *flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
-    struct sockaddr_storage *dsa, uint32_t fibnum, int flags);
-
-int kern_flowtable_insert(struct flowtable *ft, struct sockaddr_storage *ssa,
-    struct sockaddr_storage *dsa, struct route *ro, uint32_t fibnum, int flags);
-
-void flow_invalidate(struct flentry *fl);
-void flowtable_route_flush(struct flowtable *ft, struct rtentry *rt);
-
-void flow_to_route(struct flentry *fl, struct route *ro);
-
-void flow_to_route_in6(struct flentry *fl, struct route_in6 *ro);
-
-
 #endif /* _KERNEL */
-#endif
+#endif /* !_NET_FLOWTABLE_H_ */

Modified: trunk/sys/net/ieee8023ad_lacp.c
===================================================================
--- trunk/sys/net/ieee8023ad_lacp.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/ieee8023ad_lacp.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -29,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/ieee8023ad_lacp.c 237669 2012-06-27 22:06:42Z thompsa $");
+__FBSDID("$FreeBSD: stable/10/sys/net/ieee8023ad_lacp.c 313039 2017-02-01 04:54:23Z rpokala $");
 
 #include <sys/param.h>
 #include <sys/callout.h>
@@ -188,30 +188,37 @@
 static void	lacp_dprintf(const struct lacp_port *, const char *, ...)
 		    __attribute__((__format__(__printf__, 2, 3)));
 
-static int lacp_debug = 0;
-SYSCTL_INT(_net, OID_AUTO, lacp_debug, CTLFLAG_RW | CTLFLAG_TUN,
-    &lacp_debug, 0, "Enable LACP debug logging (1=debug, 2=trace)");
-TUNABLE_INT("net.lacp_debug", &lacp_debug);
+static VNET_DEFINE(int, lacp_debug);
+#define	V_lacp_debug	VNET(lacp_debug)
+SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD, 0, "ieee802.3ad");
+SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RWTUN | CTLFLAG_VNET,
+    &VNET_NAME(lacp_debug), 0, "Enable LACP debug logging (1=debug, 2=trace)");
 
-#define LACP_DPRINTF(a) if (lacp_debug > 0) { lacp_dprintf a ; }
-#define LACP_TRACE(a) if (lacp_debug > 1) { lacp_dprintf(a,"%s\n",__func__); }
+static VNET_DEFINE(int, lacp_default_strict_mode) = 1;
+SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, default_strict_mode, CTLFLAG_RWTUN,
+    &VNET_NAME(lacp_default_strict_mode), 0,
+    "LACP strict protocol compliance default");
 
+#define LACP_DPRINTF(a) if (V_lacp_debug & 0x01) { lacp_dprintf a ; }
+#define LACP_TRACE(a) if (V_lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); }
+#define LACP_TPRINTF(a) if (V_lacp_debug & 0x04) { lacp_dprintf a ; }
+
 /*
  * partner administration variables.
  * XXX should be configurable.
  */
 
-static const struct lacp_peerinfo lacp_partner_admin = {
+static const struct lacp_peerinfo lacp_partner_admin_optimistic = {
 	.lip_systemid = { .lsi_prio = 0xffff },
 	.lip_portid = { .lpi_prio = 0xffff },
-#if 1
-	/* optimistic */
 	.lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
 	    LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
-#else
-	/* pessimistic */
+};
+
+static const struct lacp_peerinfo lacp_partner_admin_strict = {
+	.lip_systemid = { .lsi_prio = 0xffff },
+	.lip_portid = { .lpi_prio = 0xffff },
 	.lip_state = 0,
-#endif
 };
 
 static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
@@ -297,11 +304,16 @@
 		goto bad;
 	}
 
-        if (lacp_debug > 0) {
+        if (V_lacp_debug > 0) {
 		lacp_dprintf(lp, "lacpdu receive\n");
 		lacp_dump_lacpdu(du);
 	}
 
+	if ((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_rx_test) {
+		LACP_TPRINTF((lp, "Dropping RX PDU\n"));
+		goto bad;
+	}
+
 	LACP_LOCK(lsc);
 	lacp_sm_rx(lp, du);
 	LACP_UNLOCK(lsc);
@@ -349,7 +361,7 @@
 
 	LACP_LOCK_ASSERT(lp->lp_lsc);
 
-	m = m_gethdr(M_DONTWAIT, MT_DATA);
+	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		return (ENOMEM);
 	}
@@ -377,7 +389,7 @@
 	    sizeof(du->ldu_collector));
 	du->ldu_collector.lci_maxdelay = 0;
 
-	if (lacp_debug > 0) {
+	if (V_lacp_debug > 0) {
 		lacp_dprintf(lp, "lacpdu transmit\n");
 		lacp_dump_lacpdu(du);
 	}
@@ -403,7 +415,7 @@
 
 	LACP_LOCK_ASSERT(lp->lp_lsc);
 
-	m = m_gethdr(M_DONTWAIT, MT_DATA);
+	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		return (ENOMEM);
 	}
@@ -489,6 +501,7 @@
 		if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
 			continue;
 
+		CURVNET_SET(lp->lp_ifp->if_vnet);
 		lacp_run_timers(lp);
 
 		lacp_select(lp);
@@ -495,6 +508,7 @@
 		lacp_sm_mux(lp);
 		lacp_sm_tx(lp);
 		lacp_sm_ptx_tx_schedule(lp);
+		CURVNET_RESTORE();
 	}
 	callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
 }
@@ -510,9 +524,6 @@
 	struct ifmultiaddr *rifma = NULL;
 	int error;
 
-	boolean_t active = TRUE; /* XXX should be configurable */
-	boolean_t fast = FALSE; /* XXX should be configurable */
-
 	bzero((char *)&sdl, sizeof(sdl));
 	sdl.sdl_len = sizeof(sdl);
 	sdl.sdl_family = AF_LINK;
@@ -544,9 +555,7 @@
 
 	lacp_fill_actorinfo(lp, &lp->lp_actor);
 	lacp_fill_markerinfo(lp, &lp->lp_marker);
-	lp->lp_state =
-	    (active ? LACP_STATE_ACTIVITY : 0) |
-	    (fast ? LACP_STATE_TIMEOUT : 0);
+	lp->lp_state = LACP_STATE_ACTIVITY;
 	lp->lp_aggregator = NULL;
 	lacp_sm_rx_set_expired(lp);
 	LACP_UNLOCK(lsc);
@@ -571,12 +580,13 @@
 	lacp_disable_distributing(lp);
 	lacp_unselect(lp);
 
+	LIST_REMOVE(lp, lp_next);
+	LACP_UNLOCK(lsc);
+
 	/* The address may have already been removed by if_purgemaddrs() */
 	if (!lgp->lp_detaching)
 		if_delmulti_ifma(lp->lp_ifma);
 
-	LIST_REMOVE(lp, lp_next);
-	LACP_UNLOCK(lsc);
 	free(lp, M_DEVBUF);
 }
 
@@ -585,10 +595,20 @@
 {
 	struct lacp_opreq *req = (struct lacp_opreq *)data;
 	struct lacp_softc *lsc = LACP_SOFTC(sc);
-	struct lacp_aggregator *la = lsc->lsc_active_aggregator;
+	struct lacp_aggregator *la;
 
+	bzero(req, sizeof(struct lacp_opreq));
+	
+	/* 
+	 * If the LACP softc is NULL, return with the opreq structure full of
+	 * zeros.  It is normal for the softc to be NULL while the lagg is
+	 * being destroyed.
+	 */
+	if (NULL == lsc)
+		return;
+
+	la = lsc->lsc_active_aggregator;
 	LACP_LOCK(lsc);
-	bzero(req, sizeof(struct lacp_opreq));
 	if (la != NULL) {
 		req->actor_prio = ntohs(la->la_actor.lip_systemid.lsi_prio);
 		memcpy(&req->actor_mac, &la->la_actor.lip_systemid.lsi_mac,
@@ -654,6 +674,7 @@
 {
 	struct lacp_aggregator *la = lp->lp_aggregator;
 	struct lacp_softc *lsc = lp->lp_lsc;
+	struct lagg_softc *sc = lsc->lsc_softc;
 	char buf[LACP_LAGIDSTR_MAX+1];
 
 	LACP_LOCK_ASSERT(lsc);
@@ -673,6 +694,7 @@
 
 	TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
 	la->la_nports--;
+	sc->sc_active = la->la_nports;
 
 	if (lsc->lsc_active_aggregator == la) {
 		lacp_suppress_distributing(lsc, la);
@@ -689,6 +711,7 @@
 {
 	struct lacp_aggregator *la = lp->lp_aggregator;
 	struct lacp_softc *lsc = lp->lp_lsc;
+	struct lagg_softc *sc = lsc->lsc_softc;
 	char buf[LACP_LAGIDSTR_MAX+1];
 
 	LACP_LOCK_ASSERT(lsc);
@@ -705,6 +728,7 @@
 	KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
 	TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
 	la->la_nports++;
+	sc->sc_active = la->la_nports;
 
 	lp->lp_state |= LACP_STATE_DISTRIBUTING;
 
@@ -723,20 +747,19 @@
 
 	LACP_LOCK_ASSERT(lsc);
 
+	CURVNET_SET(lsc->lsc_softc->sc_ifp->if_vnet);
 	LACP_TRACE(NULL);
+	CURVNET_RESTORE();
 
 	lsc->lsc_suppress_distributing = FALSE;
 }
 
-int
+void
 lacp_attach(struct lagg_softc *sc)
 {
 	struct lacp_softc *lsc;
 
-	lsc = malloc(sizeof(struct lacp_softc),
-	    M_DEVBUF, M_NOWAIT|M_ZERO);
-	if (lsc == NULL)
-		return (ENOMEM);
+	lsc = malloc(sizeof(struct lacp_softc), M_DEVBUF, M_WAITOK | M_ZERO);
 
 	sc->sc_psc = (caddr_t)lsc;
 	lsc->lsc_softc = sc;
@@ -743,6 +766,7 @@
 
 	lsc->lsc_hashkey = arc4random();
 	lsc->lsc_active_aggregator = NULL;
+	lsc->lsc_strict_mode = VNET(lacp_default_strict_mode);
 	LACP_LOCK_INIT(lsc);
 	TAILQ_INIT(&lsc->lsc_aggregators);
 	LIST_INIT(&lsc->lsc_ports);
@@ -753,14 +777,12 @@
 	/* if the lagg is already up then do the same */
 	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
 		lacp_init(sc);
-
-	return (0);
 }
 
 int
-lacp_detach(struct lagg_softc *sc)
+lacp_detach(void *psc)
 {
-	struct lacp_softc *lsc = LACP_SOFTC(sc);
+	struct lacp_softc *lsc = (struct lacp_softc *)psc;
 
 	KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
 	    ("aggregators still active"));
@@ -767,7 +789,6 @@
 	KASSERT(lsc->lsc_active_aggregator == NULL,
 	    ("aggregator still attached"));
 
-	sc->sc_psc = NULL;
 	callout_drain(&lsc->lsc_transit_callout);
 	callout_drain(&lsc->lsc_callout);
 
@@ -816,8 +837,9 @@
 		return (NULL);
 	}
 
-	if (sc->use_flowid && (m->m_flags & M_FLOWID))
-		hash = m->m_pkthdr.flowid;
+	if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
+	    M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+		hash = m->m_pkthdr.flowid >> sc->flowid_shift;
 	else
 		hash = lagg_hashmbuf(sc, m, lsc->lsc_hashkey);
 	hash %= pm->pm_count;
@@ -909,7 +931,6 @@
 static void
 lacp_select_active_aggregator(struct lacp_softc *lsc)
 {
-	struct lagg_softc *sc = lsc->lsc_softc;
 	struct lacp_aggregator *la;
 	struct lacp_aggregator *best_la = NULL;
 	uint64_t best_speed = 0;
@@ -961,7 +982,6 @@
 	    lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
 
 	if (lsc->lsc_active_aggregator != best_la) {
-		sc->sc_ifp->if_baudrate = best_speed;
 		lsc->lsc_active_aggregator = best_la;
 		lacp_update_portmap(lsc);
 		if (best_la) {
@@ -977,9 +997,11 @@
 static void
 lacp_update_portmap(struct lacp_softc *lsc)
 {
+	struct lagg_softc *sc = lsc->lsc_softc;
 	struct lacp_aggregator *la;
 	struct lacp_portmap *p;
 	struct lacp_port *lp;
+	uint64_t speed;
 	u_int newmap;
 	int i;
 
@@ -986,6 +1008,7 @@
 	newmap = lsc->lsc_activemap == 0 ? 1 : 0;
 	p = &lsc->lsc_pmap[newmap];
 	la = lsc->lsc_active_aggregator;
+	speed = 0;
 	bzero(p, sizeof(struct lacp_portmap));
 
 	if (la != NULL && la->la_nports > 0) {
@@ -994,7 +1017,9 @@
 		TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q)
 			p->pm_map[i++] = lp;
 		KASSERT(i == p->pm_count, ("Invalid port count"));
+		speed = lacp_aggregator_bandwidth(la);
 	}
+	sc->sc_ifp->if_baudrate = speed;
 
 	/* switch the active portmap over */
 	atomic_store_rel_int(&lsc->lsc_activemap, newmap);
@@ -1029,8 +1054,87 @@
 		KASSERT(IFM_TYPE(media) == IFM_ETHER, ("invalid media type"));
 		KASSERT((media & IFM_FDX) != 0, ("aggregating HDX interface"));
 
-		/* bit 0..4:	IFM_SUBTYPE */
-		key = subtype;
+		/* bit 0..4:	IFM_SUBTYPE modulo speed */
+		switch (subtype) {
+		case IFM_10_T:
+		case IFM_10_2:
+		case IFM_10_5:
+		case IFM_10_STP:
+		case IFM_10_FL:
+			key = IFM_10_T;
+			break;
+		case IFM_100_TX:
+		case IFM_100_FX:
+		case IFM_100_T4:
+		case IFM_100_VG:
+		case IFM_100_T2:
+		case IFM_100_T:
+			key = IFM_100_TX;
+			break;
+		case IFM_1000_SX:
+		case IFM_1000_LX:
+		case IFM_1000_CX:
+		case IFM_1000_T:
+		case IFM_1000_KX:
+		case IFM_1000_SGMII:
+		case IFM_1000_CX_SGMII:
+			key = IFM_1000_SX;
+			break;
+		case IFM_10G_LR:
+		case IFM_10G_SR:
+		case IFM_10G_CX4:
+		case IFM_10G_TWINAX:
+		case IFM_10G_TWINAX_LONG:
+		case IFM_10G_LRM:
+		case IFM_10G_T:
+		case IFM_10G_KX4:
+		case IFM_10G_KR:
+		case IFM_10G_CR1:
+		case IFM_10G_ER:
+		case IFM_10G_SFI:
+			key = IFM_10G_LR;
+			break;
+		case IFM_20G_KR2:
+			key = IFM_20G_KR2;
+			break;
+		case IFM_2500_KX:
+		case IFM_2500_T:
+			key = IFM_2500_KX;
+			break;
+		case IFM_5000_T:
+			key = IFM_5000_T;
+			break;
+		case IFM_50G_PCIE:
+		case IFM_50G_CR2:
+		case IFM_50G_KR2:
+			key = IFM_50G_PCIE;
+			break;
+		case IFM_56G_R4:
+			key = IFM_56G_R4;
+			break;
+		case IFM_25G_PCIE:
+		case IFM_25G_CR:
+		case IFM_25G_KR:
+		case IFM_25G_SR:
+			key = IFM_25G_PCIE;
+			break;
+		case IFM_40G_CR4:
+		case IFM_40G_SR4:
+		case IFM_40G_LR4:
+		case IFM_40G_XLPPI:
+		case IFM_40G_KR4:
+			key = IFM_40G_CR4;
+			break;
+		case IFM_100G_CR4:
+		case IFM_100G_SR4:
+		case IFM_100G_KR4:
+		case IFM_100G_LR4:
+			key = IFM_100G_CR4;
+			break;
+		default:
+			key = subtype;
+			break;
+		}
 		/* bit 5..14:	(some bits of) if_index of lagg device */
 		key |= 0x7fe0 & ((sc->sc_ifp->if_index) << 5);
 		/* bit 15:	0 */
@@ -1265,6 +1369,8 @@
 static void
 lacp_sm_mux(struct lacp_port *lp)
 {
+	struct lagg_port *lgp = lp->lp_lagg;
+	struct lagg_softc *sc = lgp->lp_softc;
 	enum lacp_mux_state new_state;
 	boolean_t p_sync =
 		    (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0;
@@ -1273,8 +1379,10 @@
 	enum lacp_selected selected = lp->lp_selected;
 	struct lacp_aggregator *la;
 
-	if (lacp_debug > 1)
-		lacp_dprintf(lp, "%s: state %d\n", __func__, lp->lp_mux_state);
+	if (V_lacp_debug > 1)
+		lacp_dprintf(lp, "%s: state= 0x%x, selected= 0x%x, "
+		    "p_sync= 0x%x, p_collecting= 0x%x\n", __func__,
+		    lp->lp_mux_state, selected, p_sync, p_collecting);
 
 re_eval:
 	la = lp->lp_aggregator;
@@ -1314,6 +1422,8 @@
 	case LACP_MUX_DISTRIBUTING:
 		if (selected != LACP_SELECTED || !p_sync || !p_collecting) {
 			new_state = LACP_MUX_COLLECTING;
+			lacp_dprintf(lp, "Interface stopped DISTRIBUTING, possible flapping\n");
+			sc->sc_flapping++;
 		}
 		break;
 	default:
@@ -1562,6 +1672,10 @@
 		    sizeof(buf))));
 	}
 
+	/* XXX Hack, still need to implement 5.4.9 para 2,3,4 */
+	if (lp->lp_lsc->lsc_strict_mode)
+		lp->lp_partner.lip_state |= LACP_STATE_SYNC;
+
 	lacp_sm_ptx_update_timeout(lp, oldpstate);
 }
 
@@ -1587,7 +1701,10 @@
 	LACP_TRACE(lp);
 
 	oldpstate = lp->lp_partner.lip_state;
-	lp->lp_partner = lacp_partner_admin;
+	if (lp->lp_lsc->lsc_strict_mode)
+		lp->lp_partner = lacp_partner_admin_strict;
+	else
+		lp->lp_partner = lacp_partner_admin_optimistic;
 	lp->lp_state |= LACP_STATE_DEFAULTED;
 	lacp_sm_ptx_update_timeout(lp, oldpstate);
 }
@@ -1622,7 +1739,12 @@
 
 	LACP_TRACE(lp);
 
-	lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin);
+	if (lp->lp_lsc->lsc_strict_mode)
+		lacp_sm_rx_update_selected_from_peerinfo(lp,
+		    &lacp_partner_admin_strict);
+	else
+		lacp_sm_rx_update_selected_from_peerinfo(lp,
+		    &lacp_partner_admin_optimistic);
 }
 
 /* transmit machine */
@@ -1630,7 +1752,7 @@
 static void
 lacp_sm_tx(struct lacp_port *lp)
 {
-	int error;
+	int error = 0;
 
 	if (!(lp->lp_state & LACP_STATE_AGGREGATION)
 #if 1
@@ -1652,7 +1774,11 @@
 		return;
 	}
 
-	error = lacp_xmit_lacpdu(lp);
+	if (((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_tx_test) == 0) {
+		error = lacp_xmit_lacpdu(lp);
+	} else {
+		LACP_TPRINTF((lp, "Dropping TX PDU\n"));
+	}
 
 	if (error == 0) {
 		lp->lp_flags &= ~LACP_PORT_NTT;

Modified: trunk/sys/net/ieee8023ad_lacp.h
===================================================================
--- trunk/sys/net/ieee8023ad_lacp.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/ieee8023ad_lacp.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -26,7 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/ieee8023ad_lacp.h 177289 2008-03-17 01:26:44Z thompsa $
+ * $FreeBSD: stable/10/sys/net/ieee8023ad_lacp.h 287808 2015-09-15 05:19:10Z hiren $
  */
 
 /*
@@ -76,6 +76,7 @@
 	"\007DEFAULTED"		\
 	"\010EXPIRED"
 
+#ifdef _KERNEL
 /*
  * IEEE802.3 slow protocols
  *
@@ -246,6 +247,12 @@
 	struct lacp_portmap	lsc_pmap[2];
 	volatile u_int		lsc_activemap;
 	u_int32_t		lsc_hashkey;
+	struct {
+		u_int32_t	lsc_rx_test;
+		u_int32_t	lsc_tx_test;
+	} lsc_debug;
+	u_int32_t		lsc_strict_mode;
+	boolean_t		lsc_fast_timeout; /* if set, fast timeout */
 };
 
 #define	LACP_TYPE_ACTORINFO	1
@@ -278,8 +285,8 @@
 
 struct mbuf	*lacp_input(struct lagg_port *, struct mbuf *);
 struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *);
-int		lacp_attach(struct lagg_softc *);
-int		lacp_detach(struct lagg_softc *);
+void		lacp_attach(struct lagg_softc *);
+int		lacp_detach(void *);
 void		lacp_init(struct lagg_softc *);
 void		lacp_stop(struct lagg_softc *);
 int		lacp_port_create(struct lagg_port *);
@@ -332,3 +339,4 @@
 #define	LACP_LAGIDSTR_MAX	\
 	(1 + LACP_PARTNERSTR_MAX + 1 + LACP_PARTNERSTR_MAX + 1)
 #define	LACP_STATESTR_MAX	(255) /* XXX */
+#endif	/* _KERNEL */

Modified: trunk/sys/net/if.c
===================================================================
--- trunk/sys/net/if.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if.c	8.5 (Berkeley) 1/9/95
- * $FreeBSD: stable/9/sys/net/if.c 249132 2013-04-05 08:22:11Z mav $
+ * $FreeBSD: stable/10/sys/net/if.c 333106 2018-04-30 08:39:23Z royger $
  */
 
 #include "opt_compat.h"
@@ -75,18 +75,19 @@
 #include <net/vnet.h>
 
 #if defined(INET) || defined(INET6)
-/*XXX*/
+#include <net/ethernet.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
+#include <netinet/ip.h>
 #include <netinet/ip_carp.h>
+#ifdef INET
+#include <netinet/if_ether.h>
+#endif /* INET */
 #ifdef INET6
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_ifattach.h>
-#endif
-#endif
-#ifdef INET
-#include <netinet/if_ether.h>
-#endif
+#endif /* INET6 */
+#endif /* INET || INET6 */
 
 #include <security/mac/mac_framework.h>
 
@@ -113,6 +114,14 @@
 	&log_link_state_change, 0,
 	"log interface link state change events");
 
+/* Log promiscuous mode change events */
+static int log_promisc_mode_change = 1;
+
+TUNABLE_INT("net.link.log_promisc_mode_change", &log_promisc_mode_change);
+SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
+	&log_promisc_mode_change, 1,
+	"log promiscuous mode change events");
+
 /* Interface description */
 static unsigned int ifdescr_maxlen = 1024;
 SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
@@ -130,18 +139,22 @@
 void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
 /* These are external hooks for CARP. */
 void	(*carp_linkstate_p)(struct ifnet *ifp);
+void	(*carp_demote_adj_p)(int, char *);
+int	(*carp_master_p)(struct ifaddr *);
 #if defined(INET) || defined(INET6)
-struct ifnet *(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
+int	(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
 int	(*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
-    struct sockaddr *sa, struct rtentry *rt);
+    const struct sockaddr *sa);
+int	(*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);   
+int	(*carp_attach_p)(struct ifaddr *, int);
+void	(*carp_detach_p)(struct ifaddr *);
 #endif
 #ifdef INET
-int (*carp_iamatch_p)(struct ifnet *, struct in_ifaddr *, struct in_addr *,
-    u_int8_t **);
+int	(*carp_iamatch_p)(struct ifaddr *, uint8_t **);
 #endif
 #ifdef INET6
 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
-caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
+caddr_t	(*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
     const struct in6_addr *taddr);
 #endif
 
@@ -158,6 +171,7 @@
 static void	if_freemulti(struct ifmultiaddr *);
 static void	if_init(void *);
 static void	if_grow(void);
+static void	if_input_default(struct ifnet *, struct mbuf *);
 static void	if_route(struct ifnet *, int flag, int fam);
 static int	if_setflag(struct ifnet *, int, int, int *, int);
 static int	if_transmit(struct ifnet *ifp, struct mbuf *m);
@@ -170,8 +184,8 @@
 static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
 static int	if_getgroupmembers(struct ifgroupreq *);
 static void	if_delgroups(struct ifnet *);
-static void	if_attach_internal(struct ifnet *, int);
-static void	if_detach_internal(struct ifnet *, int);
+static void	if_attach_internal(struct ifnet *, int, struct if_clone *);
+static int	if_detach_internal(struct ifnet *, int, struct if_clone **);
 
 #ifdef INET6
 /*
@@ -334,11 +348,12 @@
 struct ifaddr *
 ifaddr_byindex(u_short idx)
 {
-	struct ifaddr *ifa;
+	struct ifnet *ifp;
+	struct ifaddr *ifa = NULL;
 
 	IFNET_RLOCK_NOSLEEP();
-	ifa = ifnet_byindex_locked(idx)->if_addr;
-	if (ifa != NULL)
+	ifp = ifnet_byindex_locked(idx);
+	if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
 		ifa_ref(ifa);
 	IFNET_RUNLOCK_NOSLEEP();
 	return (ifa);
@@ -454,7 +469,6 @@
 	ifp->if_afdata_initialized = 0;
 	IF_AFDATA_LOCK_INIT(ifp);
 	TAILQ_INIT(&ifp->if_addrhead);
-	TAILQ_INIT(&ifp->if_prefixhead);
 	TAILQ_INIT(&ifp->if_multiaddrs);
 	TAILQ_INIT(&ifp->if_groups);
 #ifdef MAC
@@ -495,21 +509,15 @@
 }
 
 /*
- * This version should only be called by intefaces that switch their type
- * after calling if_alloc().  if_free_type() will go away again now that we
- * have if_alloctype to cache the original allocation type.  For now, assert
- * that they match, since we require that in practice.
+ * Deregister an interface and free the associated storage.
  */
 void
-if_free_type(struct ifnet *ifp, u_char type)
+if_free(struct ifnet *ifp)
 {
 
-	KASSERT(ifp->if_alloctype == type,
-	    ("if_free_type: type (%d) != alloctype (%d)", type,
-	    ifp->if_alloctype));
-
 	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
 
+	CURVNET_SET_QUIET(ifp->if_vnet);
 	IFNET_WLOCK();
 	KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
 	    ("%s: freeing unallocated ifnet", ifp->if_xname));
@@ -517,24 +525,12 @@
 	ifindex_free_locked(ifp->if_index);
 	IFNET_WUNLOCK();
 
-	if (!refcount_release(&ifp->if_refcount))
-		return;
-	if_free_internal(ifp);
+	if (refcount_release(&ifp->if_refcount))
+		if_free_internal(ifp);
+	CURVNET_RESTORE();
 }
 
 /*
- * This is the normal version of if_free(), used by device drivers to free a
- * detached network interface.  The contents of if_free_type() will move into
- * here when if_free_type() goes away.
- */
-void
-if_free(struct ifnet *ifp)
-{
-
-	if_free_type(ifp, ifp->if_alloctype);
-}
-
-/*
  * Interfaces to keep an ifnet type-stable despite the possibility of the
  * driver calling if_free().  If there are additional references, we defer
  * freeing the underlying data structure.
@@ -585,6 +581,15 @@
  * tasks, given that we are moving from one vnet to another an ifnet which
  * has already been fully initialized.
  *
+ * Note that if_detach_internal() removes group membership unconditionally
+ * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
+ * Thus, when if_vmove() is applied to a cloned interface, group membership
+ * is lost while a cloned one always joins a group whose name is
+ * ifc->ifc_name.  To recover this after if_detach_internal() and
+ * if_attach_internal(), the cloner should be specified to
+ * if_attach_internal() via ifc.  If it is non-NULL, if_attach_internal()
+ * attempts to join a group whose name is ifc->ifc_name.
+ *
  * XXX:
  *  - The decision to return void and thus require this function to
  *    succeed is questionable.
@@ -595,11 +600,62 @@
 if_attach(struct ifnet *ifp)
 {
 
-	if_attach_internal(ifp, 0);
+	if_attach_internal(ifp, 0, NULL);
 }
 
+/*
+ * Compute the least common TSO limit.
+ */
+void
+if_hw_tsomax_common(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
+{
+	/*
+	 * 1) If there is no limit currently, take the limit from
+	 * the network adapter.
+	 *
+	 * 2) If the network adapter has a limit below the current
+	 * limit, apply it.
+	 */
+	if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 &&
+	    ifp->if_hw_tsomax < pmax->tsomaxbytes)) {
+		pmax->tsomaxbytes = ifp->if_hw_tsomax;
+	}
+	if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 &&
+	    ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) {
+		pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
+	}
+	if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 &&
+	    ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) {
+		pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
+	}
+}
+
+/*
+ * Update TSO limit of a network adapter.
+ *
+ * Returns zero if no change. Else non-zero.
+ */
+int
+if_hw_tsomax_update(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
+{
+	int retval = 0;
+	if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
+		ifp->if_hw_tsomax = pmax->tsomaxbytes;
+		retval++;
+	}
+	if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) {
+		ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize;
+		retval++;
+	}
+	if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) {
+		ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount;
+		retval++;
+	}
+	return (retval);
+}
+
 static void
-if_attach_internal(struct ifnet *ifp, int vmove)
+if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
 {
 	unsigned socksize, ifasize;
 	int namelen, masklen;
@@ -618,6 +674,10 @@
 
 	if_addgroup(ifp, IFG_ALL);
 
+	/* Restore group membership for cloned interfaces. */
+	if (vmove && ifc != NULL)
+		if_clone_addgroup(ifp, ifc);
+
 	getmicrotime(&ifp->if_lastchange);
 	ifp->if_data.ifi_epoch = time_uptime;
 	ifp->if_data.ifi_datalen = sizeof(struct if_data);
@@ -629,7 +689,9 @@
 		ifp->if_transmit = if_transmit;
 		ifp->if_qflush = if_qflush;
 	}
-	
+	if (ifp->if_input == NULL)
+		ifp->if_input = if_input_default;
+
 	if (!vmove) {
 #ifdef MAC
 		mac_ifnet_create(ifp);
@@ -670,6 +732,37 @@
 		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
 		/* Reliably crash if used uninitialized. */
 		ifp->if_broadcastaddr = NULL;
+
+		if (ifp->if_type == IFT_ETHER) {
+			ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR,
+			    M_WAITOK | M_ZERO);
+		}
+
+#if defined(INET) || defined(INET6)
+		/* Use defaults for TSO, if nothing is set */
+		if (ifp->if_hw_tsomax == 0 &&
+		    ifp->if_hw_tsomaxsegcount == 0 &&
+		    ifp->if_hw_tsomaxsegsize == 0) {
+			/*
+			 * The TSO defaults needs to be such that an
+			 * NFS mbuf list of 35 mbufs totalling just
+			 * below 64K works and that a chain of mbufs
+			 * can be defragged into at most 32 segments:
+			 */
+			ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) -
+			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
+			ifp->if_hw_tsomaxsegcount = 35;
+			ifp->if_hw_tsomaxsegsize = 2048;	/* 2K */
+
+			/* XXX some drivers set IFCAP_TSO after ethernet attach */
+			if (ifp->if_capabilities & IFCAP_TSO) {
+				if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n",
+				    ifp->if_hw_tsomax,
+				    ifp->if_hw_tsomaxsegcount,
+				    ifp->if_hw_tsomaxsegsize);
+			}
+		}
+#endif
 	}
 #ifdef VIMAGE
 	else {
@@ -709,12 +802,9 @@
 if_attachdomain(void *dummy)
 {
 	struct ifnet *ifp;
-	int s;
 
-	s = splnet();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
 		if_attachdomain1(ifp);
-	splx(s);
 }
 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
     if_attachdomain, NULL);
@@ -723,23 +813,17 @@
 if_attachdomain1(struct ifnet *ifp)
 {
 	struct domain *dp;
-	int s;
 
-	s = splnet();
-
 	/*
 	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
 	 * cannot lock ifp->if_afdata initialization, entirely.
 	 */
-	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
-		splx(s);
+	if (IF_AFDATA_TRYLOCK(ifp) == 0)
 		return;
-	}
 	if (ifp->if_afdata_initialized >= domain_init_status) {
 		IF_AFDATA_UNLOCK(ifp);
-		splx(s);
-		printf("if_attachdomain called more than once on %s\n",
-		    ifp->if_xname);
+		log(LOG_WARNING, "%s called more than once on %s\n",
+		    __func__, ifp->if_xname);
 		return;
 	}
 	ifp->if_afdata_initialized = domain_init_status;
@@ -752,8 +836,6 @@
 			ifp->if_afdata[dp->dom_family] =
 			    (*dp->dom_ifattach)(ifp);
 	}
-
-	splx(s);
 }
 
 /*
@@ -822,11 +904,13 @@
 if_detach(struct ifnet *ifp)
 {
 
-	if_detach_internal(ifp, 0);
+	CURVNET_SET_QUIET(ifp->if_vnet);
+	if_detach_internal(ifp, 0, NULL);
+	CURVNET_RESTORE();
 }
 
-static void
-if_detach_internal(struct ifnet *ifp, int vmove)
+static int
+if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
 {
 	struct ifaddr *ifa;
 	struct radix_node_head	*rnh;
@@ -848,13 +932,25 @@
 #endif
 	IFNET_WUNLOCK();
 	if (!found) {
+		/*
+		 * While we would want to panic here, we cannot
+		 * guarantee that the interface is indeed still on
+		 * the list given we don't hold locks all the way.
+		 */
+		return (ENOENT);
+#if 0
 		if (vmove)
 			panic("%s: ifp=%p not on the ifnet tailq %p",
 			    __func__, ifp, &V_ifnet);
 		else
 			return; /* XXX this should panic as well? */
+#endif
 	}
 
+	/* Check if this is a cloned interface or not. */
+	if (vmove && ifcp != NULL)
+		*ifcp = if_clone_findifc(ifp);
+
 	/*
 	 * Remove/wait for pending events.
 	 */
@@ -888,6 +984,12 @@
 #endif
 	if_purgemaddrs(ifp);
 
+	/* Announce that the interface is gone. */
+	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
+	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
+	if (IS_DEFAULT_VNET(curvnet))
+		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
+
 	if (!vmove) {
 		/*
 		 * Prevent further calls into the device driver via ifnet.
@@ -898,6 +1000,8 @@
 		 * Remove link ifaddr pointer and maybe decrement if_index.
 		 * Clean up all addresses.
 		 */
+		free(ifp->if_hw_addr, M_IFADDR);
+		ifp->if_hw_addr = NULL;
 		ifp->if_addr = NULL;
 
 		/* We can now free link ifaddr. */
@@ -925,11 +1029,6 @@
 		}
 	}
 
-	/* Announce that the interface is gone. */
-	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
-	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
-	if (IS_DEFAULT_VNET(curvnet))
-		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
 	if_delgroups(ifp);
 
 	/*
@@ -946,6 +1045,8 @@
 			(*dp->dom_ifdetach)(ifp,
 			    ifp->if_afdata[dp->dom_family]);
 	}
+
+	return (0);
 }
 
 #ifdef VIMAGE
@@ -960,12 +1061,17 @@
 if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
 {
 	u_short idx;
+	struct if_clone *ifc;
+	int rc;
 
 	/*
 	 * Detach from current vnet, but preserve LLADDR info, do not
 	 * mark as dead etc. so that the ifnet can be reattached later.
+	 * If we cannot find it, we lost the race to someone else.
 	 */
-	if_detach_internal(ifp, 1);
+	rc = if_detach_internal(ifp, 1, &ifc);
+	if (rc != 0)
+		return;
 
 	/*
 	 * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
@@ -999,7 +1105,7 @@
 	ifnet_setbyindex_locked(ifp->if_index, ifp);
 	IFNET_WUNLOCK();
 
-	if_attach_internal(ifp, 1);
+	if_attach_internal(ifp, 1, ifc);
 
 	CURVNET_RESTORE();
 }
@@ -1102,6 +1208,7 @@
 	struct ifg_list		*ifgl;
 	struct ifg_group	*ifg = NULL;
 	struct ifg_member	*ifgm;
+	int 			 new = 0;
 
 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
 	    groupname[strlen(groupname) - 1] <= '9')
@@ -1142,8 +1249,8 @@
 		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
 		ifg->ifg_refcnt = 0;
 		TAILQ_INIT(&ifg->ifg_members);
-		EVENTHANDLER_INVOKE(group_attach_event, ifg);
 		TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
+		new = 1;
 	}
 
 	ifg->ifg_refcnt++;
@@ -1157,6 +1264,8 @@
 
 	IFNET_WUNLOCK();
 
+	if (new)
+		EVENTHANDLER_INVOKE(group_attach_event, ifg);
 	EVENTHANDLER_INVOKE(group_change_event, groupname);
 
 	return (0);
@@ -1195,10 +1304,11 @@
 
 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
 		TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
+		IFNET_WUNLOCK();
 		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
 		free(ifgl->ifgl_group, M_TEMP);
-	}
-	IFNET_WUNLOCK();
+	} else
+		IFNET_WUNLOCK();
 
 	free(ifgl, M_TEMP);
 
@@ -1239,11 +1349,12 @@
 
 		if (--ifgl->ifgl_group->ifg_refcnt == 0) {
 			TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
+			IFNET_WUNLOCK();
 			EVENTHANDLER_INVOKE(group_detach_event,
 			    ifgl->ifgl_group);
 			free(ifgl->ifgl_group, M_TEMP);
-		}
-		IFNET_WUNLOCK();
+		} else
+			IFNET_WUNLOCK();
 
 		free(ifgl, M_TEMP);
 
@@ -1392,6 +1503,100 @@
 }
 
 /*
+ * A compatibility function returns ifnet counter values.
+ */
+uint64_t
+if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
+{
+
+	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
+	switch (cnt) {
+	case IFCOUNTER_IPACKETS:
+		return (ifp->if_ipackets);
+	case IFCOUNTER_IERRORS:
+		return (ifp->if_ierrors);
+	case IFCOUNTER_OPACKETS:
+		return (ifp->if_opackets);
+	case IFCOUNTER_OERRORS:
+		return (ifp->if_oerrors);
+	case IFCOUNTER_COLLISIONS:
+		return (ifp->if_collisions);
+	case IFCOUNTER_IBYTES:
+		return (ifp->if_ibytes);
+	case IFCOUNTER_OBYTES:
+		return (ifp->if_obytes);
+	case IFCOUNTER_IMCASTS:
+		return (ifp->if_imcasts);
+	case IFCOUNTER_OMCASTS:
+		return (ifp->if_omcasts);
+	case IFCOUNTER_IQDROPS:
+		return (ifp->if_iqdrops);
+#ifdef _IFI_OQDROPS
+	case IFCOUNTER_OQDROPS:
+		return (ifp->if_oqdrops);
+#endif
+	case IFCOUNTER_NOPROTO:
+		return (ifp->if_noproto);
+	default:
+		break;
+	};
+	return (0);
+}
+
+/*
+ * Increase an ifnet counter. Usually used for counters shared
+ * between the stack and a driver, but function supports them all.
+ */
+void
+if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
+{
+
+	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
+	switch (cnt) {
+	case IFCOUNTER_IPACKETS:
+		ifp->if_ipackets += inc;
+		break;
+	case IFCOUNTER_IERRORS:
+		ifp->if_ierrors += inc;
+		break;
+	case IFCOUNTER_OPACKETS:
+		ifp->if_opackets += inc;
+		break;
+	case IFCOUNTER_OERRORS:
+		ifp->if_oerrors += inc;
+		break;
+	case IFCOUNTER_COLLISIONS:
+		ifp->if_collisions += inc;
+		break;
+	case IFCOUNTER_IBYTES:
+		ifp->if_ibytes += inc;
+		break;
+	case IFCOUNTER_OBYTES:
+		ifp->if_obytes += inc;
+		break;
+	case IFCOUNTER_IMCASTS:
+		ifp->if_imcasts += inc;
+		break;
+	case IFCOUNTER_OMCASTS:
+		ifp->if_omcasts += inc;
+		break;
+	case IFCOUNTER_IQDROPS:
+		ifp->if_iqdrops += inc;
+		break;
+#ifdef _IFI_OQDROPS
+	case IFCOUNTER_OQDROPS:
+		ifp->if_oqdrops += inc;
+		break;
+#endif
+	case IFCOUNTER_NOPROTO:
+		ifp->if_noproto += inc;
+		break;
+	default:
+		break;
+	};
+}
+
+/*
  * Wrapper functions for struct ifnet address list locking macros.  These are
  * used by kernel modules to avoid encoding programming interface or binary
  * interface assumptions that may be violated when kernel-internal locking
@@ -1426,7 +1631,7 @@
 }
 
 /*
- * Reference count functions for ifaddrs.
+ * Initialization, destruction and refcounting functions for ifaddrs.
  */
 void
 ifa_init(struct ifaddr *ifa)
@@ -1434,6 +1639,7 @@
 
 	mtx_init(&ifa->ifa_mtx, "ifaddr", NULL, MTX_DEF);
 	refcount_init(&ifa->ifa_refcnt, 1);
+	ifa->if_data.ifi_datalen = sizeof(ifa->if_data);
 }
 
 void
@@ -1463,10 +1669,10 @@
 
 	bzero(&info, sizeof(info));
 	info.rti_ifp = V_loif;
-	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
+	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
 	info.rti_info[RTAX_DST] = ia;
 	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
-	error = rtrequest1_fib(RTM_ADD, &info, &rt, 0);
+	error = rtrequest1_fib(RTM_ADD, &info, &rt, ifa->ifa_ifp->if_fib);
 
 	if (error == 0 && rt != NULL) {
 		RT_LOCK(rt);
@@ -1477,7 +1683,7 @@
 		RT_REMREF(rt);
 		RT_UNLOCK(rt);
 	} else if (error != 0)
-		log(LOG_INFO, "ifa_add_loopback_route: insertion failed\n");
+		log(LOG_DEBUG, "%s: insertion failed: %u\n", __func__, error);
 
 	return (error);
 }
@@ -1495,13 +1701,13 @@
 	null_sdl.sdl_type = ifa->ifa_ifp->if_type;
 	null_sdl.sdl_index = ifa->ifa_ifp->if_index;
 	bzero(&info, sizeof(info));
-	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
+	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
 	info.rti_info[RTAX_DST] = ia;
 	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
-	error = rtrequest1_fib(RTM_DELETE, &info, NULL, 0);
+	error = rtrequest1_fib(RTM_DELETE, &info, NULL, ifa->ifa_ifp->if_fib);
 
 	if (error != 0)
-		log(LOG_INFO, "ifa_del_loopback_route: deletion failed\n");
+		log(LOG_DEBUG, "%s: deletion failed: %u\n", __func__, error);
 
 	return (error);
 }
@@ -1615,7 +1821,7 @@
  */
 /*ARGSUSED*/
 struct ifaddr *
-ifa_ifwithdstaddr(struct sockaddr *addr)
+ifa_ifwithdstaddr_fib(struct sockaddr *addr, int fibnum)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
@@ -1624,6 +1830,8 @@
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
 			continue;
+		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
+			continue;
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
@@ -1643,12 +1851,19 @@
 	return (ifa);
 }
 
+struct ifaddr *
+ifa_ifwithdstaddr(struct sockaddr *addr)
+{
+
+	return (ifa_ifwithdstaddr_fib(addr, RT_ALL_FIBS));
+}
+
 /*
  * Find an interface on a specific network.  If many, choice
  * is most specific found.
  */
 struct ifaddr *
-ifa_ifwithnet(struct sockaddr *addr, int ignore_ptp)
+ifa_ifwithnet_fib(struct sockaddr *addr, int ignore_ptp, int fibnum)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
@@ -1668,12 +1883,14 @@
 
 	/*
 	 * Scan though each interface, looking for ones that have addresses
-	 * in this address family.  Maintain a reference on ifa_maybe once
-	 * we find one, as we release the IF_ADDR_RLOCK() that kept it stable
-	 * when we move onto the next interface.
+	 * in this address family and the requested fib.  Maintain a reference
+	 * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that
+	 * kept it stable when we move onto the next interface.
 	 */
 	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
+			continue;
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			char *cp, *cp2, *cp3;
@@ -1730,11 +1947,13 @@
 				/*
 				 * If the netmask of what we just found
 				 * is more specific than what we had before
-				 * (if we had one) then remember the new one
-				 * before continuing to search
-				 * for an even better one.
+				 * (if we had one), or if the virtual status
+				 * of new prefix is better than of the old one,
+				 * then remember the new one before continuing
+				 * to search for an even better one.
 				 */
 				if (ifa_maybe == NULL ||
+				    ifa_preferred(ifa_maybe, ifa) ||
 				    rn_refines((caddr_t)ifa->ifa_netmask,
 				    (caddr_t)ifa_maybe->ifa_netmask)) {
 					if (ifa_maybe != NULL)
@@ -1755,6 +1974,13 @@
 	return (ifa);
 }
 
+struct ifaddr *
+ifa_ifwithnet(struct sockaddr *addr, int ignore_ptp)
+{
+
+	return (ifa_ifwithnet_fib(addr, ignore_ptp, RT_ALL_FIBS));
+}
+
 /*
  * Find an interface address specific to an interface best matching
  * a given address.
@@ -1806,6 +2032,21 @@
 	return (ifa);
 }
 
+/*
+ * See whether new ifa is better than current one:
+ * 1) A non-virtual one is preferred over virtual.
+ * 2) A virtual in master state preferred over any other state.
+ *
+ * Used in several address selecting functions.
+ */
+int
+ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
+{
+
+	return (cur->ifa_carp && (!next->ifa_carp ||
+	    ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
+}
+
 #include <net/if_llatbl.h>
 
 /*
@@ -1838,7 +2079,6 @@
 /*
  * Mark an interface down and notify protocols of
  * the transition.
- * NOTE: must be called at splnet or eqivalent.
  */
 static void
 if_unroute(struct ifnet *ifp, int flag, int fam)
@@ -1862,7 +2102,6 @@
 /*
  * Mark an interface up and notify protocols of
  * the transition.
- * NOTE: must be called at splnet or eqivalent.
  */
 static void
 if_route(struct ifnet *ifp, int flag, int fam)
@@ -1941,7 +2180,7 @@
 	if (log_link_state_change)
 		log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
 		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
-	EVENTHANDLER_INVOKE(ifnet_link_event, ifp, ifp->if_link_state);
+	EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
 	CURVNET_RESTORE();
 }
 
@@ -1948,12 +2187,12 @@
 /*
  * Mark an interface down and notify protocols of
  * the transition.
- * NOTE: must be called at splnet or eqivalent.
  */
 void
 if_down(struct ifnet *ifp)
 {
 
+	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
 }
 
@@ -1960,7 +2199,6 @@
 /*
  * Mark an interface up and notify protocols of
  * the transition.
- * NOTE: must be called at splnet or eqivalent.
  */
 void
 if_up(struct ifnet *ifp)
@@ -1967,6 +2205,7 @@
 {
 
 	if_route(ifp, IFF_UP, AF_UNSPEC);
+	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
 }
 
 /*
@@ -1986,7 +2225,7 @@
 #endif
 	n = ifq->ifq_head;
 	while ((m = n) != 0) {
-		n = m->m_act;
+		n = m->m_nextpkt;
 		m_freem(m);
 	}
 	ifq->ifq_head = 0;
@@ -2038,7 +2277,7 @@
 {
 	struct ifreq *ifr;
 	struct ifstat *ifs;
-	int error = 0;
+	int error = 0, do_ifup = 0;
 	int new_flags, temp_flags;
 	size_t namelen, onamelen;
 	size_t descrlen;
@@ -2163,14 +2402,10 @@
 			/* Smart drivers twiddle their own routes */
 		} else if (ifp->if_flags & IFF_UP &&
 		    (new_flags & IFF_UP) == 0) {
-			int s = splimp();
 			if_down(ifp);
-			splx(s);
 		} else if (new_flags & IFF_UP &&
 		    (ifp->if_flags & IFF_UP) == 0) {
-			int s = splimp();
-			if_up(ifp);
-			splx(s);
+			do_ifup = 1;
 		}
 		/* See if permanently promiscuous mode bit is about to flip */
 		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
@@ -2178,9 +2413,11 @@
 				ifp->if_flags |= IFF_PROMISC;
 			else if (ifp->if_pcount == 0)
 				ifp->if_flags &= ~IFF_PROMISC;
-			log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
-			    ifp->if_xname,
-			    (new_flags & IFF_PPROMISC) ? "enabled" : "disabled");
+			if (log_promisc_mode_change)
+                                log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
+                                    ifp->if_xname,
+                                    ((new_flags & IFF_PPROMISC) ?
+                                     "enabled" : "disabled"));
 		}
 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
 			(new_flags &~ IFF_CANTCHANGE);
@@ -2187,6 +2424,8 @@
 		if (ifp->if_ioctl) {
 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
 		}
+		if (do_ifup)
+			if_up(ifp);
 		getmicrotime(&ifp->if_lastchange);
 		break;
 
@@ -2218,6 +2457,11 @@
 			return (error);
 		if (new_name[0] == '\0')
 			return (EINVAL);
+		if (new_name[IFNAMSIZ-1] != '\0') {
+			new_name[IFNAMSIZ-1] = '\0';
+			if (strlen(new_name) == IFNAMSIZ-1)
+				return (EINVAL);
+		}
 		if (ifunit(new_name) != NULL)
 			return (EEXIST);
 
@@ -2388,7 +2632,10 @@
 	case SIOCGIFPDSTADDR:
 	case SIOCGLIFPHYADDR:
 	case SIOCGIFMEDIA:
+	case SIOCGIFXMEDIA:
 	case SIOCGIFGENERIC:
+	case SIOCGIFRSSKEY:
+	case SIOCGIFRSSHASH:
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
@@ -2403,6 +2650,10 @@
 		EVENTHANDLER_INVOKE(iflladdr_event, ifp);
 		break;
 
+	case SIOCGHWADDR:
+		error = if_gethwaddr(ifp, ifr);
+		break;
+
 	case SIOCAIFGROUP:
 	{
 		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
@@ -2523,6 +2774,16 @@
 		error = if_getgroupmembers((struct ifgroupreq *)data);
 		CURVNET_RESTORE();
 		return (error);
+#if defined(INET) || defined(INET6)
+	case SIOCSVH:
+	case SIOCGVH:
+		if (carp_ioctl_p == NULL)
+			error = EPROTONOSUPPORT;
+		else
+			error = (*carp_ioctl_p)(ifr, cmd, td);
+		CURVNET_RESTORE();
+		return (error);
+#endif
 	}
 
 	ifp = ifunit_ref(ifr->ifr_name);
@@ -2544,11 +2805,23 @@
 		CURVNET_RESTORE();
 		return (EOPNOTSUPP);
 	}
+
+	/*
+	 * Pass the request on to the socket control method, and if the
+	 * latter returns EOPNOTSUPP, directly to the interface.
+	 *
+	 * Make an exception for the legacy SIOCSIF* requests.  Drivers
+	 * trust SIOCSIFADDR et al to come from an already privileged
+	 * layer, and do not perform any credentials checks or input
+	 * validation.
+	 */
 #ifndef COMPAT_43
 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
 								 data,
 								 ifp, td));
-	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL)
+	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
+	    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
+	    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 #else
 	{
@@ -2592,7 +2865,9 @@
 								   data,
 								   ifp, td));
 		if (error == EOPNOTSUPP && ifp != NULL &&
-		    ifp->if_ioctl != NULL)
+		    ifp->if_ioctl != NULL &&
+		    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
+		    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
 			error = (*ifp->if_ioctl)(ifp, cmd, data);
 		switch (ocmd) {
 
@@ -2608,11 +2883,8 @@
 
 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
 #ifdef INET6
-		if (ifp->if_flags & IFF_UP) {
-			int s = splimp();
+		if (ifp->if_flags & IFF_UP)
 			in6_if_up(ifp);
-			splx(s);
-		}
 #endif
 	}
 	if_rele(ifp);
@@ -2708,7 +2980,8 @@
 	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
 			   &ifp->if_pcount, pswitch);
 	/* If promiscuous mode status has changed, log a message */
-	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC))
+	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
+            log_promisc_mode_change)
 		log(LOG_INFO, "%s: promiscuous mode %s\n",
 		    ifp->if_xname,
 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
@@ -2753,10 +3026,10 @@
 		int addrs;
 
 		/*
-		 * Zero the ifr_name buffer to make sure we don't
-		 * disclose the contents of the stack.
+		 * Zero the ifr to make sure we don't disclose the contents
+		 * of the stack.
 		 */
-		memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));
+		memset(&ifr, 0, sizeof(ifr));
 
 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
 		    >= sizeof(ifr.ifr_name)) {
@@ -2784,7 +3057,13 @@
 			} else
 #endif
 			if (sa->sa_len <= sizeof(*sa)) {
-				ifr.ifr_addr = *sa;
+				if (sa->sa_len < sizeof(*sa)) {
+					memset(&ifr.ifr_ifru.ifru_addr, 0,
+					    sizeof(ifr.ifr_ifru.ifru_addr));
+					memcpy(&ifr.ifr_ifru.ifru_addr, sa,
+					    sa->sa_len);
+				} else
+					ifr.ifr_ifru.ifru_addr = *sa;
 				sbuf_bcat(sb, &ifr, sizeof(ifr));
 				max_len += sizeof(ifr);
 			} else {
@@ -2800,7 +3079,6 @@
 		}
 		IF_ADDR_RUNLOCK(ifp);
 		if (addrs == 0) {
-			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
 			sbuf_bcat(sb, &ifr, sizeof(ifr));
 			max_len += sizeof(ifr);
 
@@ -3320,6 +3598,29 @@
 }
 
 /*
+ * Get the link layer address that was read from the hardware at attach.
+ *
+ * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type
+ * their component interfaces as IFT_IEEE8023ADLAG.
+ */
+int
+if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr)
+{
+
+	if (ifp->if_hw_addr == NULL)
+		return (ENODEV);
+
+	switch (ifp->if_type) {
+	case IFT_ETHER:
+	case IFT_IEEE8023ADLAG:
+		bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen);
+		return (0);
+	default:
+		return (ENODEV);
+	}
+}
+
+/*
  * The name argument must be a pointer to storage which will last as
  * long as the interface does.  For physical devices, the result of
  * device_get_name(dev) is a good choice and for pseudo-devices a
@@ -3369,6 +3670,13 @@
 	return (error);
 }
 
+static void
+if_input_default(struct ifnet *ifp __unused, struct mbuf *m)
+{
+
+	m_freem(m);
+}
+
 int
 if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
 {

Modified: trunk/sys/net/if.h
===================================================================
--- trunk/sys/net/if.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if.h 238247 2012-07-08 14:21:36Z bz $
+ * $FreeBSD: stable/10/sys/net/if.h 324462 2017-10-10 02:35:04Z sephe $
  */
 
 #ifndef _NET_IF_H_
@@ -86,8 +86,8 @@
 	u_char	ifi_addrlen;		/* media address length */
 	u_char	ifi_hdrlen;		/* media header length */
 	u_char	ifi_link_state;		/* current link state */
-	u_char	ifi_spare_char1;	/* spare byte */
-	u_char	ifi_spare_char2;	/* spare byte */
+	u_char	ifi_vhid;		/* carp vhid */
+	u_char	ifi_baudrate_pf;	/* baudrate power factor */
 	u_char	ifi_datalen;		/* length of this data struct */
 	u_long	ifi_mtu;		/* maximum transmission unit */
 	u_long	ifi_metric;		/* routing metric (external only) */
@@ -104,9 +104,12 @@
 	u_long	ifi_omcasts;		/* packets sent via multicast */
 	u_long	ifi_iqdrops;		/* dropped on input, this interface */
 	u_long	ifi_noproto;		/* destined for unsupported protocol */
-	u_long	ifi_hwassist;		/* HW offload capabilities, see IFCAP */
+	uint64_t ifi_hwassist;		/* HW offload capabilities, see IFCAP */
 	time_t	ifi_epoch;		/* uptime at attach or stat reset */
 	struct	timeval ifi_lastchange;	/* time of last administrative change */
+#ifdef _IFI_OQDROPS
+	u_long	ifi_oqdrops;		/* dropped on output */
+#endif
 };
 
 /*-
@@ -154,7 +157,6 @@
 #define	IFF_STATICARP	0x80000		/* (n) static ARP */
 #define	IFF_DYING	0x200000	/* (n) interface is winding down */
 #define	IFF_RENAMING	0x400000	/* (n) interface is being renamed */
-
 /*
  * Old names for driver flags so that user space tools can continue to use
  * the old (portable) names.
@@ -181,7 +183,7 @@
  * Some convenience macros used for setting ifi_baudrate.
  * XXX 1000 vs. 1024? --thorpej at netbsd.org
  */
-#define	IF_Kbps(x)	((x) * 1000)		/* kilobits/sec. */
+#define	IF_Kbps(x)	((uintmax_t)(x) * 1000)	/* kilobits/sec. */
 #define	IF_Mbps(x)	(IF_Kbps((x) * 1000))	/* megabits/sec. */
 #define	IF_Gbps(x)	(IF_Mbps((x) * 1000))	/* gigabits/sec. */
 
@@ -233,6 +235,7 @@
 #define	IFCAP_NETMAP		0x100000 /* netmap mode supported/enabled */
 #define	IFCAP_RXCSUM_IPV6	0x200000  /* can offload checksum on IPv6 RX */
 #define	IFCAP_TXCSUM_IPV6	0x400000  /* can offload checksum on IPv6 TX */
+#define	IFCAP_HWSTATS		0x800000 /* manages counters internally */
 
 #define IFCAP_HWCSUM_IPV6	(IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
 
@@ -284,6 +287,9 @@
 	u_short	ifm_len;	/* length of if_msghdrl incl. if_data */
 	u_short	ifm_data_off;	/* offset of if_data from beginning */
 	struct	if_data ifm_data;/* statistics and other data about if */
+#ifdef _IN_NET_RTSOCK_C
+	u_long	ifi_oqdrops;
+#endif
 };
 
 /*
@@ -416,8 +422,17 @@
 	struct	sockaddr ifra_addr;
 	struct	sockaddr ifra_broadaddr;
 	struct	sockaddr ifra_mask;
+	int	ifra_vhid;
 };
 
+/* Compat with pre-10.x */
+struct oifaliasreq {
+	char	ifra_name[IFNAMSIZ];
+	struct	sockaddr ifra_addr;
+	struct	sockaddr ifra_broadaddr;
+	struct	sockaddr ifra_mask;
+};
+
 struct ifmediareq {
 	char	ifm_name[IFNAMSIZ];	/* if name, e.g. "en0" */
 	int	ifm_current;		/* current media options */
@@ -507,6 +522,55 @@
 	struct	sockaddr_storage dstaddr; /* out */
 };
 
+/*
+ * Structure used to request i2c data
+ * from interface transceivers.
+ */
+struct ifi2creq {
+	uint8_t dev_addr;	/* i2c address (0xA0, 0xA2) */
+	uint8_t offset;		/* read offset */
+	uint8_t len;		/* read length */
+	uint8_t spare0;
+	uint32_t spare1;
+	uint8_t data[8];	/* read buffer */
+}; 
+
+/*
+ * RSS hash.
+ */
+
+#define	RSS_FUNC_NONE		0		/* RSS disabled */
+#define	RSS_FUNC_PRIVATE	1		/* non-standard */
+#define	RSS_FUNC_TOEPLITZ	2
+
+#define	RSS_TYPE_IPV4		0x00000001
+#define	RSS_TYPE_TCP_IPV4	0x00000002
+#define	RSS_TYPE_IPV6		0x00000004
+#define	RSS_TYPE_IPV6_EX	0x00000008
+#define	RSS_TYPE_TCP_IPV6	0x00000010
+#define	RSS_TYPE_TCP_IPV6_EX	0x00000020
+#define	RSS_TYPE_UDP_IPV4	0x00000040
+#define	RSS_TYPE_UDP_IPV6	0x00000080
+#define	RSS_TYPE_UDP_IPV6_EX	0x00000100
+
+#define	RSS_KEYLEN		128
+
+struct ifrsskey {
+	char		ifrk_name[IFNAMSIZ];	/* if name, e.g. "en0" */
+	uint8_t		ifrk_func;		/* RSS_FUNC_ */
+	uint8_t		ifrk_spare0;
+	uint16_t	ifrk_keylen;
+	uint8_t		ifrk_key[RSS_KEYLEN];
+};
+
+struct ifrsshash {
+	char		ifrh_name[IFNAMSIZ];	/* if name, e.g. "en0" */
+	uint8_t		ifrh_func;		/* RSS_FUNC_ */
+	uint8_t		ifrh_spare0;
+	uint16_t	ifrh_spare1;
+	uint32_t	ifrh_types;		/* RSS_TYPE_ */
+};
+
 #endif /* __BSD_VISIBLE */
 
 #ifdef _KERNEL

Modified: trunk/sys/net/if_arc.h
===================================================================
--- trunk/sys/net/if_arc.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_arc.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,6 +1,6 @@
 /* $MidnightBSD$ */
 /*	$NetBSD: if_arc.h,v 1.13 1999/11/19 20:41:19 thorpej Exp $	*/
-/* $FreeBSD: stable/9/sys/net/if_arc.h 194581 2009-06-21 10:29:31Z rdivacky $ */
+/* $FreeBSD: stable/10/sys/net/if_arc.h 249925 2013-04-26 12:50:32Z glebius $ */
 
 /*-
  * Copyright (c) 1982, 1986, 1993
@@ -134,7 +134,7 @@
 int	arc_isphds(u_int8_t);
 void	arc_input(struct ifnet *, struct mbuf *);
 int	arc_output(struct ifnet *, struct mbuf *,
-	    struct sockaddr *, struct route *);
+	    const struct sockaddr *, struct route *);
 int	arc_ioctl(struct ifnet *, u_long, caddr_t);
 
 void		arc_frag_init(struct ifnet *);

Modified: trunk/sys/net/if_arcsubr.c
===================================================================
--- trunk/sys/net/if_arcsubr.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_arcsubr.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,6 +1,6 @@
 /* $MidnightBSD$ */
 /*	$NetBSD: if_arcsubr.c,v 1.36 2001/06/14 05:44:23 itojun Exp $	*/
-/*	$FreeBSD: stable/9/sys/net/if_arcsubr.c 249132 2013-04-05 08:22:11Z mav $ */
+/*	$FreeBSD: stable/10/sys/net/if_arcsubr.c 332160 2018-04-07 00:04:28Z brooks $ */
 
 /*-
  * Copyright (c) 1994, 1995 Ignatios Souvatzis
@@ -93,8 +93,8 @@
 #define ARC_LLADDR(ifp)	(*(u_int8_t *)IF_LLADDR(ifp))
 
 #define senderr(e) { error = (e); goto bad;}
-#define SIN(s)	((struct sockaddr_in *)s)
-#define SIPX(s)	((struct sockaddr_ipx *)s)
+#define SIN(s)	((const struct sockaddr_in *)(s))
+#define SIPX(s)	((const struct sockaddr_ipx *)(s))
 
 /*
  * ARCnet output routine.
@@ -102,7 +102,7 @@
  * Assumes that ifp is actually pointer to arccom structure.
  */
 int
-arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+arc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct arc_header	*ah;
@@ -187,8 +187,11 @@
 #endif
 
 	case AF_UNSPEC:
+	    {
+		const struct arc_header *ah;
+
 		loop_copy = -1;
-		ah = (struct arc_header *)dst->sa_data;
+		ah = (const struct arc_header *)dst->sa_data;
 		adst = ah->arc_dhost;
 		atype = ah->arc_type;
 
@@ -208,7 +211,7 @@
 #endif
 		}
 		break;
-
+	    }
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		senderr(EAFNOSUPPORT);
@@ -215,7 +218,7 @@
 	}
 
 	isphds = arc_isphds(atype);
-	M_PREPEND(m, isphds ? ARC_HDRNEWLEN : ARC_HDRLEN, M_DONTWAIT);
+	M_PREPEND(m, isphds ? ARC_HDRNEWLEN : ARC_HDRLEN, M_NOWAIT);
 	if (m == 0)
 		senderr(ENOBUFS);
 	ah = mtod(m, struct arc_header *);
@@ -295,13 +298,13 @@
 	/* split out next fragment and return it */
 	if (ac->sflag < ac->fsflag) {
 		/* we CAN'T have short packets here */
-		ac->curr_frag = m_split(m, ARC_MAX_DATA, M_DONTWAIT);
+		ac->curr_frag = m_split(m, ARC_MAX_DATA, M_NOWAIT);
 		if (ac->curr_frag == 0) {
 			m_freem(m);
 			return 0;
 		}
 
-		M_PREPEND(m, ARC_HDRNEWLEN, M_DONTWAIT);
+		M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT);
 		if (m == 0) {
 			m_freem(ac->curr_frag);
 			ac->curr_frag = 0;
@@ -320,7 +323,7 @@
 	    ARC_MAX_FORBID_LEN - ARC_HDRNEWLEN + 2)) {
 		ac->curr_frag = 0;
 
-		M_PREPEND(m, ARC_HDRNEWLEN_EXC, M_DONTWAIT);
+		M_PREPEND(m, ARC_HDRNEWLEN_EXC, M_NOWAIT);
 		if (m == 0)
 			return 0;
 
@@ -333,7 +336,7 @@
 	} else {
 		ac->curr_frag = 0;
 
-		M_PREPEND(m, ARC_HDRNEWLEN, M_DONTWAIT);
+		M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT);
 		if (m == 0)
 			return 0;
 
@@ -639,11 +642,7 @@
 	ifp->if_resolvemulti = arc_resolvemulti;
 	if (ifp->if_baudrate == 0)
 		ifp->if_baudrate = 2500000;
-#if __FreeBSD_version < 500000
-	ifa = ifnet_addrs[ifp->if_index - 1];
-#else
 	ifa = ifp->if_addr;
-#endif
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_ARCNET;
@@ -717,12 +716,7 @@
 		break;
 
 	case SIOCGIFADDR:
-		{
-			struct sockaddr *sa;
-
-			sa = (struct sockaddr *) &ifr->ifr_data;
-			*(u_int8_t *)sa->sa_data = ARC_LLADDR(ifp);
-		}
+		ifr->ifr_addr.sa_data[0] = ARC_LLADDR(ifp);
 		break;
 
 	case SIOCADDMULTI:

Modified: trunk/sys/net/if_arp.h
===================================================================
--- trunk/sys/net/if_arp.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_arp.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_arp.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if_arp.h 219819 2011-03-21 09:40:01Z jeff $
+ * $FreeBSD: stable/10/sys/net/if_arp.h 253084 2013-07-09 09:50:15Z ae $
  */
 
 #ifndef _NET_IF_ARP_H_
@@ -114,27 +114,35 @@
 
 struct arpstat {
 	/* Normal things that happen: */
-	u_long txrequests;	/* # of ARP requests sent by this host. */
-	u_long txreplies;	/* # of ARP replies sent by this host. */
-	u_long rxrequests;	/* # of ARP requests received by this host. */
-	u_long rxreplies;	/* # of ARP replies received by this host. */
-	u_long received;	/* # of ARP packets received by this host. */
+	uint64_t txrequests;	/* # of ARP requests sent by this host. */
+	uint64_t txreplies;	/* # of ARP replies sent by this host. */
+	uint64_t rxrequests;	/* # of ARP requests received by this host. */
+	uint64_t rxreplies;	/* # of ARP replies received by this host. */
+	uint64_t received;	/* # of ARP packets received by this host. */
 
-	u_long arp_spares[4];	/* For either the upper or lower half. */
+	uint64_t arp_spares[4];	/* For either the upper or lower half. */
 	/* Abnormal event and error  counting: */
-	u_long dropped;		/* # of packets dropped waiting for a reply. */
-	u_long timeouts;	/* # of times with entries removed */
+	uint64_t dropped;	/* # of packets dropped waiting for a reply. */
+	uint64_t timeouts;	/* # of times with entries removed */
 				/* due to timeout. */
-	u_long dupips;		/* # of duplicate IPs detected. */
+	uint64_t dupips;	/* # of duplicate IPs detected. */
 };
 
+#ifdef _KERNEL
+#include <sys/counter.h>
+#include <net/vnet.h>
+
+VNET_PCPUSTAT_DECLARE(struct arpstat, arpstat);
 /*
  * In-kernel consumers can use these accessor macros directly to update
  * stats.
  */
-#define	ARPSTAT_ADD(name, val)	V_arpstat.name += (val)
-#define	ARPSTAT_SUB(name, val)	V_arpstat.name -= (val)
+#define	ARPSTAT_ADD(name, val)	\
+    VNET_PCPUSTAT_ADD(struct arpstat, arpstat, name, (val))
+#define	ARPSTAT_SUB(name, val)	ARPSTAT_ADD(name, -(val))
 #define	ARPSTAT_INC(name)	ARPSTAT_ADD(name, 1)
 #define	ARPSTAT_DEC(name)	ARPSTAT_SUB(name, 1)
 
+#endif /* _KERNEL */
+
 #endif /* !_NET_IF_ARP_H_ */

Modified: trunk/sys/net/if_atm.h
===================================================================
--- trunk/sys/net/if_atm.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_atm.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,6 +1,6 @@
 /* $MidnightBSD$ */
 /*      $NetBSD: if_atm.h,v 1.7 1996/11/09 23:02:27 chuck Exp $       */
-/* $FreeBSD: stable/9/sys/net/if_atm.h 191148 2009-04-16 20:30:28Z kmacy $ */
+/* $FreeBSD: stable/10/sys/net/if_atm.h 249925 2013-04-26 12:50:32Z glebius $ */
 
 /*-
  *
@@ -293,7 +293,7 @@
 void	atm_ifdetach(struct ifnet *);
 void	atm_input(struct ifnet *, struct atm_pseudohdr *,
 	    struct mbuf *, void *);
-int	atm_output(struct ifnet *, struct mbuf *, struct sockaddr *, 
+int	atm_output(struct ifnet *, struct mbuf *, const struct sockaddr *, 
 	    struct route *);
 struct atmio_vcctable *atm_getvccs(struct atmio_vcc **, u_int, u_int,
 	    struct mtx *, int);

Modified: trunk/sys/net/if_atmsubr.c
===================================================================
--- trunk/sys/net/if_atmsubr.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_atmsubr.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_atmsubr.c 249132 2013-04-05 08:22:11Z mav $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_atmsubr.c 249925 2013-04-26 12:50:32Z glebius $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -122,7 +122,7 @@
  *		ro->ro_rt must also be NULL.
  */
 int
-atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
+atm_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
     struct route *ro)
 {
 	u_int16_t etype = 0;			/* if using LLC/SNAP */
@@ -130,7 +130,7 @@
 	struct atm_pseudohdr atmdst, *ad;
 	struct mbuf *m = m0;
 	struct atmllc *atmllc;
-	struct atmllc *llc_hdr = NULL;
+	const struct atmllc *llc_hdr = NULL;
 	u_int32_t atm_flags;
 
 #ifdef MAC
@@ -174,7 +174,7 @@
 			 * (atm pseudo header (4) + LLC/SNAP (8))
 			 */
 			bcopy(dst->sa_data, &atmdst, sizeof(atmdst));
-			llc_hdr = (struct atmllc *)(dst->sa_data +
+			llc_hdr = (const struct atmllc *)(dst->sa_data +
 			    sizeof(atmdst));
 			break;
 			
@@ -191,7 +191,7 @@
 		atm_flags = ATM_PH_FLAGS(&atmdst);
 		if (atm_flags & ATM_PH_LLCSNAP)
 			sz += 8;	/* sizeof snap == 8 */
-		M_PREPEND(m, sz, M_DONTWAIT);
+		M_PREPEND(m, sz, M_NOWAIT);
 		if (m == 0)
 			senderr(ENOBUFS);
 		ad = mtod(m, struct atm_pseudohdr *);

Modified: trunk/sys/net/if_bridge.c
===================================================================
--- trunk/sys/net/if_bridge.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_bridge.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -76,7 +76,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_bridge.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_bridge.c 324116 2017-09-30 10:16:15Z kp $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -101,7 +101,6 @@
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
-#include <sys/rwlock.h>
 
 #include <net/bpf.h>
 #include <net/if.h>
@@ -120,6 +119,7 @@
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
+#include <netinet6/in6_ifattach.h>
 #endif
 #if defined(INET) || defined(INET6)
 #include <netinet/ip_carp.h>
@@ -132,8 +132,6 @@
 #include <net/if_vlan_var.h>
 
 #include <net/route.h>
-#include <netinet/ip_fw.h>
-#include <netpfil/ipfw/ip_fw_private.h>
 
 /*
  * Size of the route hash table.  Must be a power of two.
@@ -168,7 +166,8 @@
 /*
  * List of capabilities to possibly mask on the member interface.
  */
-#define	BRIDGE_IFCAPS_MASK		(IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM)
+#define	BRIDGE_IFCAPS_MASK		(IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM|\
+					 IFCAP_TXCSUM_IPV6)
 
 /*
  * List of capabilities to strip
@@ -246,11 +245,12 @@
 static void	bridge_init(void *);
 static void	bridge_dummynet(struct mbuf *, struct ifnet *);
 static void	bridge_stop(struct ifnet *, int);
-static void	bridge_start(struct ifnet *);
+static int	bridge_transmit(struct ifnet *, struct mbuf *);
+static void	bridge_qflush(struct ifnet *);
 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
 static int	bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *,
 		    struct rtentry *);
-static void	bridge_enqueue(struct bridge_softc *, struct ifnet *,
+static int	bridge_enqueue(struct bridge_softc *, struct ifnet *,
 		    struct mbuf *);
 static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
 
@@ -273,7 +273,7 @@
 static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
 		    uint16_t);
 
-static int	bridge_rtable_init(struct bridge_softc *);
+static void	bridge_rtable_init(struct bridge_softc *);
 static void	bridge_rtable_fini(struct bridge_softc *);
 
 static int	bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
@@ -332,7 +332,7 @@
 #ifdef INET6
 static int	bridge_ip6_checkbasic(struct mbuf **mp);
 #endif /* INET6 */
-static int	bridge_fragment(struct ifnet *, struct mbuf *,
+static int	bridge_fragment(struct ifnet *, struct mbuf **mp,
 		    struct ether_header *, int, struct llc *);
 static void	bridge_linkstate(struct ifnet *ifp);
 static void	bridge_linkcheck(struct bridge_softc *sc);
@@ -384,6 +384,12 @@
     &bridge_inherit_mac, 0,
     "Inherit MAC address from the first bridge member");
 
+static VNET_DEFINE(int, allow_llz_overlap) = 0;
+#define	V_allow_llz_overlap	VNET(allow_llz_overlap)
+SYSCTL_VNET_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap, CTLFLAG_RW,
+    &VNET_NAME(allow_llz_overlap), 0, "Allow overlap of link-local scope "
+    "zones of a bridge interface and the member interfaces");
+
 struct bridge_control {
 	int	(*bc_func)(struct bridge_softc *, void *);
 	int	bc_argsize;
@@ -484,7 +490,8 @@
 
 LIST_HEAD(, bridge_softc) bridge_list;
 
-IFC_SIMPLE_DECLARE(bridge, 0);
+static struct if_clone *bridge_cloner;
+static const char bridge_name[] = "bridge";
 
 static int
 bridge_modevent(module_t mod, int type, void *data)
@@ -493,7 +500,8 @@
 	switch (type) {
 	case MOD_LOAD:
 		mtx_init(&bridge_list_mtx, "if_bridge list", NULL, MTX_DEF);
-		if_clone_attach(&bridge_cloner);
+		bridge_cloner = if_clone_simple(bridge_name,
+		    bridge_clone_create, bridge_clone_destroy, 0);
 		bridge_rtnode_zone = uma_zcreate("bridge_rtnode",
 		    sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
@@ -509,7 +517,7 @@
 	case MOD_UNLOAD:
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 		    bridge_detach_cookie);
-		if_clone_detach(&bridge_cloner);
+		if_clone_detach(bridge_cloner);
 		uma_zdestroy(bridge_rtnode_zone);
 		bridge_input_p = NULL;
 		bridge_output_p = NULL;
@@ -530,10 +538,11 @@
 };
 
 DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_bridge, 1);
 MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1);
 
 /*
- * handler for net.link.bridge.pfil_ipfw
+ * handler for net.link.bridge.ipfw
  */
 static int
 sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS)
@@ -598,15 +607,13 @@
 	LIST_INIT(&sc->sc_spanlist);
 
 	ifp->if_softc = sc;
-	if_initname(ifp, ifc->ifc_name, unit);
+	if_initname(ifp, bridge_name, unit);
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = bridge_ioctl;
-	ifp->if_start = bridge_start;
+	ifp->if_transmit = bridge_transmit;
+	ifp->if_qflush = bridge_qflush;
 	ifp->if_init = bridge_init;
 	ifp->if_type = IFT_BRIDGE;
-	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
-	ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
-	IFQ_SET_READY(&ifp->if_snd);
 
 	/*
 	 * Generate an ethernet address with a locally administered address.
@@ -618,7 +625,7 @@
 	 */
 	fb = 0;
 	getcredhostid(curthread->td_ucred, &hostid);
-	for (retry = 1; retry != 0;) {
+	do {
 		if (fb || hostid == 0) {
 			arc4rand(sc->sc_defaddr, ETHER_ADDR_LEN, 1);
 			sc->sc_defaddr[0] &= ~1;/* clear multicast bit */
@@ -638,11 +645,13 @@
 		LIST_FOREACH(sc2, &bridge_list, sc_list) {
 			bifp = sc2->sc_ifp;
 			if (memcmp(sc->sc_defaddr,
-			    IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0)
+			    IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
 				retry = 1;
+				break;
+			}
 		}
 		mtx_unlock(&bridge_list_mtx);
-	}
+	} while (retry == 1);
 
 	bstp_attach(&sc->sc_stp, &bridge_ops);
 	ether_ifattach(ifp, sc->sc_defaddr);
@@ -690,7 +699,7 @@
 
 	bstp_detach(&sc->sc_stp);
 	ether_ifdetach(ifp);
-	if_free_type(ifp, IFT_ETHER);
+	if_free(ifp);
 
 	/* Tear down the routing table. */
 	bridge_rtable_fini(sc);
@@ -853,6 +862,7 @@
 		mask &= bif->bif_savedcaps;
 	}
 
+	BRIDGE_XLOCK(sc);
 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		enabled = bif->bif_ifp->if_capenable;
 		enabled &= ~BRIDGE_IFCAPS_STRIP;
@@ -859,8 +869,11 @@
 		/* strip off mask bits and enable them again if allowed */
 		enabled &= ~BRIDGE_IFCAPS_MASK;
 		enabled |= mask;
+		BRIDGE_UNLOCK(sc);
 		bridge_set_ifcap(sc, bif, enabled);
+		BRIDGE_LOCK(sc);
 	}
+	BRIDGE_XDROP(sc);
 
 }
 
@@ -871,6 +884,8 @@
 	struct ifreq ifr;
 	int error;
 
+	BRIDGE_UNLOCK_ASSERT(sc);
+
 	bzero(&ifr, sizeof(ifr));
 	ifr.ifr_reqcap = set;
 
@@ -978,9 +993,12 @@
 		case IFT_ETHER:
 		case IFT_L2VLAN:
 			/*
-			 * Take the interface out of promiscuous mode.
+			 * Take the interface out of promiscuous mode, but only
+			 * if it was promiscuous in the first place. It might
+			 * not be if we're in the bridge_ioctl_add() error path.
 			 */
-			(void) ifpromisc(ifs, 0);
+			if (ifs->if_flags & IFF_PROMISC)
+				(void) ifpromisc(ifs, 0);
 			break;
 
 		case IFT_GIF:
@@ -1042,14 +1060,6 @@
 	if (ifs->if_bridge != NULL)
 		return (EBUSY);
 
-	bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
-	if (bif == NULL)
-		return (ENOMEM);
-
-	bif->bif_ifp = ifs;
-	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
-	bif->bif_savedcaps = ifs->if_capenable;
-
 	switch (ifs->if_type) {
 	case IFT_ETHER:
 	case IFT_L2VLAN:
@@ -1057,10 +1067,53 @@
 		/* permitted interface types */
 		break;
 	default:
-		error = EINVAL;
-		goto out;
+		return (EINVAL);
 	}
 
+#ifdef INET6
+	/*
+	 * Two valid inet6 addresses with link-local scope must not be
+	 * on the parent interface and the member interfaces at the
+	 * same time.  This restriction is needed to prevent violation
+	 * of link-local scope zone.  Attempts to add a member
+	 * interface which has inet6 addresses when the parent has
+	 * inet6 triggers removal of all inet6 addresses on the member
+	 * interface.
+	 */
+
+	/* Check if the parent interface has a link-local scope addr. */
+	if (V_allow_llz_overlap == 0 &&
+	    in6ifa_llaonifp(sc->sc_ifp) != NULL) {
+		/*
+		 * If any, remove all inet6 addresses from the member
+		 * interfaces.
+		 */
+		BRIDGE_XLOCK(sc);
+		LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+ 			if (in6ifa_llaonifp(bif->bif_ifp)) {
+				BRIDGE_UNLOCK(sc);
+				in6_ifdetach(bif->bif_ifp);
+				BRIDGE_LOCK(sc);
+				if_printf(sc->sc_ifp,
+				    "IPv6 addresses on %s have been removed "
+				    "before adding it as a member to prevent "
+				    "IPv6 address scope violation.\n",
+				    bif->bif_ifp->if_xname);
+			}
+		}
+		BRIDGE_XDROP(sc);
+		if (in6ifa_llaonifp(ifs)) {
+			BRIDGE_UNLOCK(sc);
+			in6_ifdetach(ifs);
+			BRIDGE_LOCK(sc);
+			if_printf(sc->sc_ifp,
+			    "IPv6 addresses on %s have been removed "
+			    "before adding it as a member to prevent "
+			    "IPv6 address scope violation.\n",
+			    ifs->if_xname);
+		}
+	}
+#endif
 	/* Allow the first Ethernet member to define the MTU */
 	if (LIST_EMPTY(&sc->sc_iflist))
 		sc->sc_ifp->if_mtu = ifs->if_mtu;
@@ -1067,10 +1120,17 @@
 	else if (sc->sc_ifp->if_mtu != ifs->if_mtu) {
 		if_printf(sc->sc_ifp, "invalid MTU: %lu(%s) != %lu\n",
 		    ifs->if_mtu, ifs->if_xname, sc->sc_ifp->if_mtu);
-		error = EINVAL;
-		goto out;
+		return (EINVAL);
 	}
 
+	bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
+	if (bif == NULL)
+		return (ENOMEM);
+
+	bif->bif_ifp = ifs;
+	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
+	bif->bif_savedcaps = ifs->if_capenable;
+
 	/*
 	 * Assign the interface's MAC address to the bridge if it's the first
 	 * member and the MAC address of the bridge has not been changed from
@@ -1105,13 +1165,9 @@
 			BRIDGE_LOCK(sc);
 			break;
 	}
+
 	if (error)
 		bridge_delete_member(sc, bif, 0);
-out:
-	if (error) {
-		if (bif != NULL)
-			free(bif, M_DEVBUF);
-	}
 	return (error);
 }
 
@@ -1784,7 +1840,7 @@
  *	Enqueue a packet on a bridge member interface.
  *
  */
-static void
+static int
 bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
 {
 	int len, err = 0;
@@ -1791,13 +1847,12 @@
 	short mflags;
 	struct mbuf *m0;
 
-	len = m->m_pkthdr.len;
-	mflags = m->m_flags;
-
 	/* We may be sending a fragment so traverse the mbuf */
 	for (; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = NULL;
+		len = m->m_pkthdr.len;
+		mflags = m->m_flags;
 
 		/*
 		 * If underlying interface can not do VLAN tag insertion itself
@@ -1815,18 +1870,20 @@
 			m->m_flags &= ~M_VLANTAG;
 		}
 
+		M_ASSERTPKTHDR(m); /* We shouldn't transmit mbuf without pkthdr */
 		if ((err = dst_ifp->if_transmit(dst_ifp, m))) {
 			m_freem(m0);
+			sc->sc_ifp->if_oerrors++;
 			break;
 		}
-	}
 
-	if (err == 0) {
 		sc->sc_ifp->if_opackets++;
 		sc->sc_ifp->if_obytes += len;
 		if (mflags & M_MCAST)
 			sc->sc_ifp->if_omcasts++;
 	}
+
+	return (err);
 }
 
 /*
@@ -1952,7 +2009,7 @@
 				used = 1;
 				mc = m;
 			} else {
-				mc = m_copypacket(m, M_DONTWAIT);
+				mc = m_copypacket(m, M_NOWAIT);
 				if (mc == NULL) {
 					sc->sc_ifp->if_oerrors++;
 					continue;
@@ -1985,47 +2042,45 @@
 }
 
 /*
- * bridge_start:
+ * bridge_transmit:
  *
- *	Start output on a bridge.
+ *	Do output on a bridge.
  *
  */
-static void
-bridge_start(struct ifnet *ifp)
+static int
+bridge_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct bridge_softc *sc;
-	struct mbuf *m;
 	struct ether_header *eh;
 	struct ifnet *dst_if;
+	int error = 0;
 
 	sc = ifp->if_softc;
 
-	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
-	for (;;) {
-		IFQ_DEQUEUE(&ifp->if_snd, m);
-		if (m == 0)
-			break;
-		ETHER_BPF_MTAP(ifp, m);
+	ETHER_BPF_MTAP(ifp, m);
 
-		eh = mtod(m, struct ether_header *);
-		dst_if = NULL;
+	eh = mtod(m, struct ether_header *);
 
-		BRIDGE_LOCK(sc);
-		if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
-			dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1);
-		}
+	BRIDGE_LOCK(sc);
+	if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) &&
+	    (dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1)) != NULL) {
+		BRIDGE_UNLOCK(sc);
+		error = bridge_enqueue(sc, dst_if, m);
+	} else
+		bridge_broadcast(sc, ifp, m, 0);
 
-		if (dst_if == NULL)
-			bridge_broadcast(sc, ifp, m, 0);
-		else {
-			BRIDGE_UNLOCK(sc);
-			bridge_enqueue(sc, dst_if, m);
-		}
-	}
-	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+	return (error);
 }
 
 /*
+ * The ifp->if_qflush entry point for if_bridge(4) is no-op.
+ */
+static void
+bridge_qflush(struct ifnet *ifp __unused)
+{
+}
+
+/*
  * bridge_forward:
  *
  *	The forwarding function of the bridge.
@@ -2243,7 +2298,7 @@
 		 * for bridge processing; return the original packet for
 		 * local processing.
 		 */
-		mc = m_dup(m, M_DONTWAIT);
+		mc = m_dup(m, M_NOWAIT);
 		if (mc == NULL) {
 			BRIDGE_UNLOCK(sc);
 			return (m);
@@ -2260,7 +2315,7 @@
 		 */
 		KASSERT(bifp->if_bridge == NULL,
 		    ("loop created in bridge_input"));
-		mc2 = m_dup(m, M_DONTWAIT);
+		mc2 = m_dup(m, M_NOWAIT);
 		if (mc2 != NULL) {
 			/* Keep the layer3 header aligned */
 			int i = min(mc2->m_pkthdr.len, max_protohdr);
@@ -2310,6 +2365,7 @@
 		if ((iface)->if_type == IFT_BRIDGE) {			\
 			ETHER_BPF_MTAP(iface, m);			\
 			iface->if_ipackets++;				\
+			iface->if_ibytes += m->m_pkthdr.len;		\
 			/* Filter on the physical interface. */		\
 			if (pfil_local_phys &&				\
 			    (PFIL_HOOKED(&V_inet_pfil_hook)		\
@@ -2319,6 +2375,7 @@
 					BRIDGE_UNLOCK(sc);		\
 					return (NULL);			\
 				}					\
+				eh = mtod(m, struct ether_header *);	\
 			}						\
 		}							\
 		if (bif->bif_flags & IFBIF_LEARNING) {			\
@@ -2436,7 +2493,7 @@
 			mc = m;
 			used = 1;
 		} else {
-			mc = m_dup(m, M_DONTWAIT);
+			mc = m_dup(m, M_NOWAIT);
 			if (mc == NULL) {
 				sc->sc_ifp->if_oerrors++;
 				continue;
@@ -2499,7 +2556,7 @@
 		if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			continue;
 
-		mc = m_copypacket(m, M_DONTWAIT);
+		mc = m_copypacket(m, M_NOWAIT);
 		if (mc == NULL) {
 			sc->sc_ifp->if_oerrors++;
 			continue;
@@ -2744,24 +2801,19 @@
  *
  *	Initialize the route table for this bridge.
  */
-static int
+static void
 bridge_rtable_init(struct bridge_softc *sc)
 {
 	int i;
 
 	sc->sc_rthash = malloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
-	    M_DEVBUF, M_NOWAIT);
-	if (sc->sc_rthash == NULL)
-		return (ENOMEM);
+	    M_DEVBUF, M_WAITOK);
 
 	for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
 		LIST_INIT(&sc->sc_rthash[i]);
 
 	sc->sc_rthash_key = arc4random();
-
 	LIST_INIT(&sc->sc_rtlist);
-
-	return (0);
 }
 
 /*
@@ -2985,7 +3037,6 @@
 {
 	int snap, error, i, hlen;
 	struct ether_header *eh1, eh2;
-	struct ip_fw_args args;
 	struct ip *ip;
 	struct llc llc1;
 	u_int16_t ether_type;
@@ -3059,6 +3110,16 @@
 				goto bad;
 	}
 
+	/* Run the packet through pfil before stripping link headers */
+	if (PFIL_HOOKED(&V_link_pfil_hook) && pfil_ipfw != 0 &&
+			dir == PFIL_OUT && ifp != NULL) {
+
+		error = pfil_run_hooks(&V_link_pfil_hook, mp, ifp, dir, NULL);
+
+		if (*mp == NULL || error != 0) /* packet consumed by filter */
+			return (error);
+	}
+
 	/* Strip off the Ethernet header and keep a copy. */
 	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
 	m_adj(*mp, ETHER_HDR_LEN);
@@ -3089,63 +3150,6 @@
 			goto bad;
 	}
 
-	/* XXX this section is also in if_ethersubr.c */
-	// XXX PFIL_OUT or DIR_OUT ?
-	if (V_ip_fw_chk_ptr && pfil_ipfw != 0 &&
-			dir == PFIL_OUT && ifp != NULL) {
-		struct m_tag *mtag;
-
-		error = -1;
-		/* fetch the start point from existing tags, if any */
-		mtag = m_tag_locate(*mp, MTAG_IPFW_RULE, 0, NULL);
-		if (mtag == NULL) {
-			args.rule.slot = 0;
-		} else {
-			struct ipfw_rule_ref *r;
-
-			/* XXX can we free the tag after use ? */
-			mtag->m_tag_id = PACKET_TAG_NONE;
-			r = (struct ipfw_rule_ref *)(mtag + 1);
-			/* packet already partially processed ? */
-			if (r->info & IPFW_ONEPASS)
-				goto ipfwpass;
-			args.rule = *r;
-		}
-
-		args.m = *mp;
-		args.oif = ifp;
-		args.next_hop = NULL;
-		args.next_hop6 = NULL;
-		args.eh = &eh2;
-		args.inp = NULL;	/* used by ipfw uid/gid/jail rules */
-		i = V_ip_fw_chk_ptr(&args);
-		*mp = args.m;
-
-		if (*mp == NULL)
-			return (error);
-
-		if (ip_dn_io_ptr && (i == IP_FW_DUMMYNET)) {
-
-			/* put the Ethernet header back on */
-			M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
-			if (*mp == NULL)
-				return (error);
-			bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
-
-			/*
-			 * Pass the pkt to dummynet, which consumes it. The
-			 * packet will return to us via bridge_dummynet().
-			 */
-			args.oif = ifp;
-			ip_dn_io_ptr(mp, DIR_FWD | PROTO_IFB, &args);
-			return (error);
-		}
-
-		if (i != IP_FW_PASS) /* drop */
-			goto bad;
-	}
-
-ipfwpass:
 	error = 0;
 
 	/*
@@ -3154,15 +3158,6 @@
 	switch (ether_type) {
 	case ETHERTYPE_IP:
 		/*
-		 * before calling the firewall, swap fields the same as
-		 * IP does. here we assume the header is contiguous
-		 */
-		ip = mtod(*mp, struct ip *);
-
-		ip->ip_len = ntohs(ip->ip_len);
-		ip->ip_off = ntohs(ip->ip_off);
-
-		/*
 		 * Run pfil on the member interface and the bridge, both can
 		 * be skipped by clearing pfil_member or pfil_bridge.
 		 *
@@ -3191,16 +3186,18 @@
 			break;
 
 		/* check if we need to fragment the packet */
+		/* bridge_fragment generates a mbuf chain of packets */
+		/* that already include eth headers */
 		if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
 			i = (*mp)->m_pkthdr.len;
 			if (i > ifp->if_mtu) {
-				error = bridge_fragment(ifp, *mp, &eh2, snap,
+				error = bridge_fragment(ifp, mp, &eh2, snap,
 					    &llc1);
 				return (error);
 			}
 		}
 
-		/* Recalculate the ip checksum and restore byte ordering */
+		/* Recalculate the ip checksum. */
 		ip = mtod(*mp, struct ip *);
 		hlen = ip->ip_hl << 2;
 		if (hlen < sizeof(struct ip))
@@ -3212,8 +3209,6 @@
 			if (ip == NULL)
 				goto bad;
 		}
-		ip->ip_len = htons(ip->ip_len);
-		ip->ip_off = htons(ip->ip_off);
 		ip->ip_sum = 0;
 		if (hlen == sizeof(struct ip))
 			ip->ip_sum = in_cksum_hdr(ip);
@@ -3258,13 +3253,13 @@
 	 * Finally, put everything back the way it was and return
 	 */
 	if (snap) {
-		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
+		M_PREPEND(*mp, sizeof(struct llc), M_NOWAIT);
 		if (*mp == NULL)
 			return (error);
 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
 	}
 
-	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
+	M_PREPEND(*mp, ETHER_HDR_LEN, M_NOWAIT);
 	if (*mp == NULL)
 		return (error);
 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
@@ -3435,56 +3430,77 @@
 /*
  * bridge_fragment:
  *
- *	Return a fragmented mbuf chain.
+ *	Fragment mbuf chain in multiple packets and prepend ethernet header.
  */
 static int
-bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
+bridge_fragment(struct ifnet *ifp, struct mbuf **mp, struct ether_header *eh,
     int snap, struct llc *llc)
 {
-	struct mbuf *m0;
+	struct mbuf *m = *mp, *nextpkt = NULL, *mprev = NULL, *mcur = NULL;
 	struct ip *ip;
 	int error = -1;
 
 	if (m->m_len < sizeof(struct ip) &&
 	    (m = m_pullup(m, sizeof(struct ip))) == NULL)
-		goto out;
+		goto dropit;
 	ip = mtod(m, struct ip *);
 
-	error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
-		    CSUM_DELAY_IP);
+	m->m_pkthdr.csum_flags |= CSUM_IP;
+	error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist);
 	if (error)
-		goto out;
+		goto dropit;
 
-	/* walk the chain and re-add the Ethernet header */
-	for (m0 = m; m0; m0 = m0->m_nextpkt) {
-		if (error == 0) {
-			if (snap) {
-				M_PREPEND(m0, sizeof(struct llc), M_DONTWAIT);
-				if (m0 == NULL) {
-					error = ENOBUFS;
-					continue;
-				}
-				bcopy(llc, mtod(m0, caddr_t),
-				    sizeof(struct llc));
-			}
-			M_PREPEND(m0, ETHER_HDR_LEN, M_DONTWAIT);
-			if (m0 == NULL) {
+	/*
+	 * Walk the chain and re-add the Ethernet header for
+	 * each mbuf packet.
+	 */
+	for (mcur = m; mcur; mcur = mcur->m_nextpkt) {
+		nextpkt = mcur->m_nextpkt;
+		mcur->m_nextpkt = NULL;
+		if (snap) {
+			M_PREPEND(mcur, sizeof(struct llc), M_NOWAIT);
+			if (mcur == NULL) {
 				error = ENOBUFS;
-				continue;
+				if (mprev != NULL)
+					mprev->m_nextpkt = nextpkt;
+				goto dropit;
 			}
-			bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
-		} else 
-			m_freem(m);
+			bcopy(llc, mtod(mcur, caddr_t),sizeof(struct llc));
+		}
+
+		M_PREPEND(mcur, ETHER_HDR_LEN, M_NOWAIT);
+		if (mcur == NULL) {
+			error = ENOBUFS;
+			if (mprev != NULL)
+				mprev->m_nextpkt = nextpkt;
+			goto dropit;
+		}
+		bcopy(eh, mtod(mcur, caddr_t), ETHER_HDR_LEN);
+
+		/*
+		 * The previous two M_PREPEND could have inserted one or two
+		 * mbufs in front so we have to update the previous packet's
+		 * m_nextpkt.
+		 */
+		mcur->m_nextpkt = nextpkt;
+		if (mprev != NULL)
+			mprev->m_nextpkt = mcur;
+		else {
+			/* The first mbuf in the original chain needs to be
+			 * updated. */
+			*mp = mcur;
+		}
+		mprev = mcur;
 	}
 
-	if (error == 0)
-		KMOD_IPSTAT_INC(ips_fragmented);
-
+	KMOD_IPSTAT_INC(ips_fragmented);
 	return (error);
 
-out:
-	if (m != NULL)
-		m_freem(m);
+dropit:
+	for (mcur = *mp; mcur; mcur = m) { /* droping the full packet chain */
+		m = mcur->m_nextpkt;
+		m_freem(mcur);
+	}
 	return (error);
 }
 

Modified: trunk/sys/net/if_bridgevar.h
===================================================================
--- trunk/sys/net/if_bridgevar.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_bridgevar.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -68,7 +68,7 @@
  *
  * OpenBSD: if_bridge.h,v 1.14 2001/03/22 03:48:29 jason Exp
  *
- * $FreeBSD: stable/9/sys/net/if_bridgevar.h 173320 2007-11-04 08:32:27Z thompsa $
+ * $FreeBSD: stable/10/sys/net/if_bridgevar.h 313066 2017-02-01 21:44:50Z kp $
  */
 
 /*
@@ -281,6 +281,7 @@
 #define BRIDGE_LOCK(_sc)		mtx_lock(&(_sc)->sc_mtx)
 #define BRIDGE_UNLOCK(_sc)		mtx_unlock(&(_sc)->sc_mtx)
 #define BRIDGE_LOCK_ASSERT(_sc)		mtx_assert(&(_sc)->sc_mtx, MA_OWNED)
+#define BRIDGE_UNLOCK_ASSERT(_sc)	mtx_assert(&(_sc)->sc_mtx, MA_NOTOWNED)
 #define	BRIDGE_LOCK2REF(_sc, _err)	do {	\
 	mtx_assert(&(_sc)->sc_mtx, MA_OWNED);	\
 	if ((_sc)->sc_iflist_xcnt > 0)		\

Modified: trunk/sys/net/if_clone.c
===================================================================
--- trunk/sys/net/if_clone.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_clone.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,6 @@
 /* $MidnightBSD$ */
 /*-
+ * Copyright (c) 2012 Gleb Smirnoff <glebius at FreeBSD.org>
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -28,7 +29,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if.c	8.5 (Berkeley) 1/9/95
- * $FreeBSD: stable/9/sys/net/if_clone.c 215701 2010-11-22 19:32:54Z dim $
+ * $FreeBSD: stable/10/sys/net/if_clone.c 324813 2017-10-21 10:48:06Z avos $
  */
 
 #include <sys/param.h>
@@ -43,19 +44,65 @@
 
 #include <net/if.h>
 #include <net/if_clone.h>
-#if 0
-#include <net/if_dl.h>
-#endif
-#include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/radix.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
+/* Current IF_MAXUNIT expands maximum to 5 characters. */
+#define	IFCLOSIZ	(IFNAMSIZ - 5)
+
+/*
+ * Structure describing a `cloning' interface.
+ *
+ * List of locks
+ * (c)		const until freeing
+ * (d)		driver specific data, may need external protection.
+ * (e)		locked by if_cloners_mtx
+ * (i)		locked by ifc_mtx mtx
+ */
+struct if_clone {
+	char ifc_name[IFCLOSIZ];	/* (c) Name of device, e.g. `gif' */
+	struct unrhdr *ifc_unrhdr;	/* (c) alloc_unr(9) header */
+	int ifc_maxunit;		/* (c) maximum unit number */
+	long ifc_refcnt;		/* (i) Reference count. */
+	LIST_HEAD(, ifnet) ifc_iflist;	/* (i) List of cloned interfaces */
+	struct mtx ifc_mtx;		/* Mutex to protect members. */
+
+	enum { SIMPLE, ADVANCED } ifc_type; /* (c) */
+
+	/* (c) Driver specific cloning functions.  Called with no locks held. */
+	union {
+		struct {	/* advanced cloner */
+			ifc_match_t	*_ifc_match;
+			ifc_create_t	*_ifc_create;
+			ifc_destroy_t	*_ifc_destroy;
+		} A;
+		struct {	/* simple cloner */
+			ifcs_create_t	*_ifcs_create;
+			ifcs_destroy_t	*_ifcs_destroy;
+			int		_ifcs_minifs;	/* minimum ifs */
+
+		} S;
+	} U;
+#define	ifc_match	U.A._ifc_match
+#define	ifc_create	U.A._ifc_create
+#define	ifc_destroy	U.A._ifc_destroy
+#define	ifcs_create	U.S._ifcs_create
+#define	ifcs_destroy	U.S._ifcs_destroy
+#define	ifcs_minifs	U.S._ifcs_minifs
+
+	LIST_ENTRY(if_clone) ifc_list;	/* (e) On list of cloners */
+};
+
 static void	if_clone_free(struct if_clone *ifc);
 static int	if_clone_createif(struct if_clone *ifc, char *name, size_t len,
 		    caddr_t params);
 
+static int     ifc_simple_match(struct if_clone *, const char *);
+static int     ifc_simple_create(struct if_clone *, char *, size_t, caddr_t);
+static int     ifc_simple_destroy(struct if_clone *, struct ifnet *);
+
 static struct mtx	if_cloners_mtx;
 static VNET_DEFINE(int, if_cloners_count);
 VNET_DEFINE(LIST_HEAD(, if_clone), if_cloners);
@@ -139,18 +186,25 @@
 
 	/* Try to find an applicable cloner for this request */
 	IF_CLONERS_LOCK();
-	LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
-		if (ifc->ifc_match(ifc, name)) {
-			break;
+	LIST_FOREACH(ifc, &V_if_cloners, ifc_list)
+		if (ifc->ifc_type == SIMPLE) {
+			if (ifc_simple_match(ifc, name))
+				break;
+		} else {
+			if (ifc->ifc_match(ifc, name))
+				break;
 		}
-	}
 #ifdef VIMAGE
 	if (ifc == NULL && !IS_DEFAULT_VNET(curvnet)) {
 		CURVNET_SET_QUIET(vnet0);
-		LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
-			if (ifc->ifc_match(ifc, name))
-				break;
-		}
+		LIST_FOREACH(ifc, &V_if_cloners, ifc_list)
+			if (ifc->ifc_type == SIMPLE) {
+				if (ifc_simple_match(ifc, name))
+					break;
+			} else {
+				if (ifc->ifc_match(ifc, name))
+					break;
+			}
 		CURVNET_RESTORE();
 	}
 #endif
@@ -174,7 +228,10 @@
 	if (ifunit(name) != NULL)
 		return (EEXIST);
 
-	err = (*ifc->ifc_create)(ifc, name, len, params);
+	if (ifc->ifc_type == SIMPLE)
+		err = ifc_simple_create(ifc, name, len, params);
+	else
+		err = (*ifc->ifc_create)(ifc, name, len, params);
 	
 	if (!err) {
 		ifp = ifunit(name);
@@ -215,10 +272,14 @@
 #ifdef VIMAGE
 	if (ifc == NULL && !IS_DEFAULT_VNET(curvnet)) {
 		CURVNET_SET_QUIET(vnet0);
-		LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
-			if (ifc->ifc_match(ifc, name))
-				break;
-		}
+		LIST_FOREACH(ifc, &V_if_cloners, ifc_list)
+			if (ifc->ifc_type == SIMPLE) {
+				if (ifc_simple_match(ifc, name))
+					break;
+			} else {
+				if (ifc->ifc_match(ifc, name))
+					break;
+			}
 		CURVNET_RESTORE();
 	}
 #endif
@@ -242,7 +303,7 @@
 	int err;
 	struct ifnet *ifcifp;
 
-	if (ifc->ifc_destroy == NULL)
+	if (ifc->ifc_type == ADVANCED && ifc->ifc_destroy == NULL)
 		return(EOPNOTSUPP);
 
 	/*
@@ -267,7 +328,10 @@
 
 	if_delgroup(ifp, ifc->ifc_name);
 
-	err =  (*ifc->ifc_destroy)(ifc, ifp);
+	if (ifc->ifc_type == SIMPLE)
+		err = ifc_simple_destroy(ifc, ifp);
+	else
+		err = (*ifc->ifc_destroy)(ifc, ifp);
 
 	if (err != 0) {
 		if_addgroup(ifp, ifc->ifc_name);
@@ -280,38 +344,99 @@
 	return (err);
 }
 
-/*
- * Register a network interface cloner.
- */
-void
-if_clone_attach(struct if_clone *ifc)
+static struct if_clone *
+if_clone_alloc(const char *name, int maxunit)
 {
-	int len, maxclone;
+	struct if_clone *ifc;
 
-	/*
-	 * Compute bitmap size and allocate it.
-	 */
-	maxclone = ifc->ifc_maxunit + 1;
-	len = maxclone >> 3;
-	if ((len << 3) < maxclone)
-		len++;
-	ifc->ifc_units = malloc(len, M_CLONE, M_WAITOK | M_ZERO);
-	ifc->ifc_bmlen = len;
+	KASSERT(name != NULL, ("%s: no name\n", __func__));
+
+	ifc = malloc(sizeof(struct if_clone), M_CLONE, M_WAITOK | M_ZERO);
+	strncpy(ifc->ifc_name, name, IFCLOSIZ-1);
 	IF_CLONE_LOCK_INIT(ifc);
 	IF_CLONE_ADDREF(ifc);
+	ifc->ifc_maxunit = maxunit ? maxunit : IF_MAXUNIT;
+	ifc->ifc_unrhdr = new_unrhdr(0, ifc->ifc_maxunit, &ifc->ifc_mtx);
+	LIST_INIT(&ifc->ifc_iflist);
 
+	return (ifc);
+}
+	
+static int
+if_clone_attach(struct if_clone *ifc)
+{
+	struct if_clone *ifc1;
+
 	IF_CLONERS_LOCK();
+	LIST_FOREACH(ifc1, &V_if_cloners, ifc_list)
+		if (strcmp(ifc->ifc_name, ifc1->ifc_name) == 0) {
+			IF_CLONERS_UNLOCK();
+			IF_CLONE_REMREF(ifc);
+			return (EEXIST);
+		}
 	LIST_INSERT_HEAD(&V_if_cloners, ifc, ifc_list);
 	V_if_cloners_count++;
 	IF_CLONERS_UNLOCK();
 
-	LIST_INIT(&ifc->ifc_iflist);
+	return (0);
+}
 
-	if (ifc->ifc_attach != NULL)
-		(*ifc->ifc_attach)(ifc);
+struct if_clone *
+if_clone_advanced(const char *name, u_int maxunit, ifc_match_t match,
+	ifc_create_t create, ifc_destroy_t destroy)
+{
+	struct if_clone *ifc;
+
+	ifc = if_clone_alloc(name, maxunit);
+	ifc->ifc_type = ADVANCED;
+	ifc->ifc_match = match;
+	ifc->ifc_create = create;
+	ifc->ifc_destroy = destroy;
+
+	if (if_clone_attach(ifc) != 0) {
+		if_clone_free(ifc);
+		return (NULL);
+	}
+
 	EVENTHANDLER_INVOKE(if_clone_event, ifc);
+
+	return (ifc);
 }
 
+struct if_clone *
+if_clone_simple(const char *name, ifcs_create_t create, ifcs_destroy_t destroy,
+	u_int minifs)
+{
+	struct if_clone *ifc;
+	u_int unit;
+
+	ifc = if_clone_alloc(name, 0);
+	ifc->ifc_type = SIMPLE;
+	ifc->ifcs_create = create;
+	ifc->ifcs_destroy = destroy;
+	ifc->ifcs_minifs = minifs;
+
+	if (if_clone_attach(ifc) != 0) {
+		if_clone_free(ifc);
+		return (NULL);
+	}
+
+	for (unit = 0; unit < minifs; unit++) {
+		char name[IFNAMSIZ];
+		int error;
+
+		snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, unit);
+		error = if_clone_createif(ifc, name, IFNAMSIZ, NULL);
+		KASSERT(error == 0,
+		    ("%s: failed to create required interface %s",
+		    __func__, name));
+	}
+
+	EVENTHANDLER_INVOKE(if_clone_event, ifc);
+
+	return (ifc);
+}
+
 /*
  * Unregister a network interface cloner.
  */
@@ -318,7 +443,6 @@
 void
 if_clone_detach(struct if_clone *ifc)
 {
-	struct ifc_simple_data *ifcs = ifc->ifc_data;
 
 	IF_CLONERS_LOCK();
 	LIST_REMOVE(ifc, ifc_list);
@@ -326,8 +450,8 @@
 	IF_CLONERS_UNLOCK();
 
 	/* Allow all simples to be destroyed */
-	if (ifc->ifc_attach == ifc_simple_attach)
-		ifcs->ifcs_minifs = 0;
+	if (ifc->ifc_type == SIMPLE)
+		ifc->ifcs_minifs = 0;
 
 	/* destroy all interfaces for this cloner */
 	while (!LIST_EMPTY(&ifc->ifc_iflist))
@@ -339,16 +463,13 @@
 static void
 if_clone_free(struct if_clone *ifc)
 {
-	for (int bytoff = 0; bytoff < ifc->ifc_bmlen; bytoff++) {
-		KASSERT(ifc->ifc_units[bytoff] == 0x00,
-		    ("ifc_units[%d] is not empty", bytoff));
-	}
 
 	KASSERT(LIST_EMPTY(&ifc->ifc_iflist),
 	    ("%s: ifc_iflist not empty", __func__));
 
 	IF_CLONE_LOCK_DESTROY(ifc);
-	free(ifc->ifc_units, M_CLONE);
+	delete_unrhdr(ifc->ifc_unrhdr);
+	free(ifc, M_CLONE);
 }
 
 /*
@@ -405,6 +526,49 @@
 }
 
 /*
+ * if_clone_findifc() looks up ifnet from the current
+ * cloner list, and returns ifc if found.  Note that ifc_refcnt
+ * is incremented.
+ */
+struct if_clone *
+if_clone_findifc(struct ifnet *ifp)
+{
+	struct if_clone *ifc, *ifc0;
+	struct ifnet *ifcifp;
+
+	ifc0 = NULL;
+	IF_CLONERS_LOCK();
+	LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
+		IF_CLONE_LOCK(ifc);
+		LIST_FOREACH(ifcifp, &ifc->ifc_iflist, if_clones) {
+			if (ifp == ifcifp) {
+				ifc0 = ifc;
+				IF_CLONE_ADDREF_LOCKED(ifc);
+				break;
+			}
+		}
+		IF_CLONE_UNLOCK(ifc);
+		if (ifc0 != NULL)
+			break;
+	}
+	IF_CLONERS_UNLOCK();
+
+	return (ifc0);
+}
+
+/*
+ * if_clone_addgroup() decrements ifc_refcnt because it is called after
+ * if_clone_findifc().
+ */
+void
+if_clone_addgroup(struct ifnet *ifp, struct if_clone *ifc)
+{
+
+	if_addgroup(ifp, ifc->ifc_name);
+	IF_CLONE_REMREF(ifc);
+}
+
+/*
  * A utility function to extract unit numbers from interface names of
  * the form name###.
  *
@@ -439,101 +603,67 @@
 	return (0);
 }
 
-int
-ifc_alloc_unit(struct if_clone *ifc, int *unit)
+static int
+ifc_alloc_unit_specific(struct if_clone *ifc, int *unit)
 {
-	int wildcard, bytoff, bitoff;
-	int err = 0;
+	char name[IFNAMSIZ];
 
-	IF_CLONE_LOCK(ifc);
+	if (*unit > ifc->ifc_maxunit)
+		return (ENOSPC);
 
-	bytoff = bitoff = 0;
-	wildcard = (*unit < 0);
-	/*
-	 * Find a free unit if none was given.
-	 */
-	if (wildcard) {
-		while ((bytoff < ifc->ifc_bmlen)
-		    && (ifc->ifc_units[bytoff] == 0xff))
-			bytoff++;
-		if (bytoff >= ifc->ifc_bmlen) {
-			err = ENOSPC;
-			goto done;
-		}
-		while ((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0)
-			bitoff++;
-		*unit = (bytoff << 3) + bitoff;
-	}
+	if (alloc_unr_specific(ifc->ifc_unrhdr, *unit) == -1)
+		return (EEXIST);
 
-	if (*unit > ifc->ifc_maxunit) {
-		err = ENOSPC;
-		goto done;
+	snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, *unit);
+	if (ifunit(name) != NULL) {
+		free_unr(ifc->ifc_unrhdr, *unit);
+		return (EEXIST);
 	}
 
-	if (!wildcard) {
-		bytoff = *unit >> 3;
-		bitoff = *unit - (bytoff << 3);
-	}
+	IF_CLONE_ADDREF(ifc);
 
-	if((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0) {
-		err = EEXIST;
-		goto done;
-	}
-	/*
-	 * Allocate the unit in the bitmap.
-	 */
-	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0,
-	    ("%s: bit is already set", __func__));
-	ifc->ifc_units[bytoff] |= (1 << bitoff);
-	IF_CLONE_ADDREF_LOCKED(ifc);
-
-done:
-	IF_CLONE_UNLOCK(ifc);
-	return (err);
+	return (0);
 }
 
-void
-ifc_free_unit(struct if_clone *ifc, int unit)
+static int
+ifc_alloc_unit_next(struct if_clone *ifc, int *unit)
 {
-	int bytoff, bitoff;
+	int error;
 
+	*unit = alloc_unr(ifc->ifc_unrhdr);
+	if (*unit == -1)
+		return (ENOSPC);
 
-	/*
-	 * Compute offset in the bitmap and deallocate the unit.
-	 */
-	bytoff = unit >> 3;
-	bitoff = unit - (bytoff << 3);
+	free_unr(ifc->ifc_unrhdr, *unit);
+	for (;;) {
+		error = ifc_alloc_unit_specific(ifc, unit);
+		if (error != EEXIST)
+			break;
 
-	IF_CLONE_LOCK(ifc);
-	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0,
-	    ("%s: bit is already cleared", __func__));
-	ifc->ifc_units[bytoff] &= ~(1 << bitoff);
-	IF_CLONE_REMREF_LOCKED(ifc);	/* releases lock */
+		(*unit)++;
+	}
+
+	return (error);
 }
 
+int
+ifc_alloc_unit(struct if_clone *ifc, int *unit)
+{
+	if (*unit < 0)
+		return (ifc_alloc_unit_next(ifc, unit));
+	else
+		return (ifc_alloc_unit_specific(ifc, unit));
+}
+
 void
-ifc_simple_attach(struct if_clone *ifc)
+ifc_free_unit(struct if_clone *ifc, int unit)
 {
-	int err;
-	int unit;
-	char name[IFNAMSIZ];
-	struct ifc_simple_data *ifcs = ifc->ifc_data;
 
-	KASSERT(ifcs->ifcs_minifs - 1 <= ifc->ifc_maxunit,
-	    ("%s: %s requested more units than allowed (%d > %d)",
-	    __func__, ifc->ifc_name, ifcs->ifcs_minifs,
-	    ifc->ifc_maxunit + 1));
-
-	for (unit = 0; unit < ifcs->ifcs_minifs; unit++) {
-		snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, unit);
-		err = if_clone_createif(ifc, name, IFNAMSIZ, NULL);
-		KASSERT(err == 0,
-		    ("%s: failed to create required interface %s",
-		    __func__, name));
-	}
+	free_unr(ifc->ifc_unrhdr, unit);
+	IF_CLONE_REMREF(ifc);
 }
 
-int
+static int
 ifc_simple_match(struct if_clone *ifc, const char *name)
 {
 	const char *cp;
@@ -554,7 +684,7 @@
 	return (1);
 }
 
-int
+static int
 ifc_simple_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 {
 	char *dp;
@@ -561,7 +691,6 @@
 	int wildcard;
 	int unit;
 	int err;
-	struct ifc_simple_data *ifcs = ifc->ifc_data;
 
 	err = ifc_name2unit(name, &unit);
 	if (err != 0)
@@ -573,7 +702,7 @@
 	if (err != 0)
 		return (err);
 
-	err = ifcs->ifcs_create(ifc, unit, params);
+	err = ifc->ifcs_create(ifc, unit, params);
 	if (err != 0) {
 		ifc_free_unit(ifc, unit);
 		return (err);
@@ -597,18 +726,17 @@
 	return (0);
 }
 
-int
+static int
 ifc_simple_destroy(struct if_clone *ifc, struct ifnet *ifp)
 {
 	int unit;
-	struct ifc_simple_data *ifcs = ifc->ifc_data;
 
 	unit = ifp->if_dunit;
 
-	if (unit < ifcs->ifcs_minifs) 
+	if (unit < ifc->ifcs_minifs) 
 		return (EINVAL);
 
-	ifcs->ifcs_destroy(ifp);
+	ifc->ifcs_destroy(ifp);
 
 	ifc_free_unit(ifc, unit);
 

Modified: trunk/sys/net/if_clone.h
===================================================================
--- trunk/sys/net/if_clone.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_clone.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	From: @(#)if.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if_clone.h 195837 2009-07-23 20:46:49Z rwatson $
+ * $FreeBSD: stable/10/sys/net/if_clone.h 285824 2015-07-23 19:57:47Z hrs $
  */
 
 #ifndef	_NET_IF_CLONE_H_
@@ -36,82 +36,44 @@
 
 #ifdef _KERNEL
 
-#define IFC_CLONE_INITIALIZER(name, data, maxunit,			\
-    attach, match, create, destroy)					\
-    { { 0 }, name, maxunit, NULL, 0, data, attach, match, create, destroy }
+struct if_clone;
 
-/*
- * Structure describing a `cloning' interface.
- *
- * List of locks
- * (c)		const until freeing
- * (d)		driver specific data, may need external protection.
- * (e)		locked by if_cloners_mtx
- * (i)		locked by ifc_mtx mtx
- */
-struct if_clone {
-	LIST_ENTRY(if_clone) ifc_list;	/* (e) On list of cloners */
-	const char *ifc_name;		/* (c) Name of device, e.g. `gif' */
-	int ifc_maxunit;		/* (c) Maximum unit number */
-	unsigned char *ifc_units;	/* (i) Bitmap to handle units. */
-					/*     Considered private, access */
-					/*     via ifc_(alloc|free)_unit(). */
-	int ifc_bmlen;			/* (c) Bitmap length. */
-	void *ifc_data;			/* (*) Data for ifc_* functions. */
+/* Methods. */
+typedef int	ifc_match_t(struct if_clone *, const char *);
+typedef int	ifc_create_t(struct if_clone *, char *, size_t, caddr_t);
+typedef int	ifc_destroy_t(struct if_clone *, struct ifnet *);
 
-	/* (c) Driver specific cloning functions.  Called with no locks held. */
-	void	(*ifc_attach)(struct if_clone *);
-	int	(*ifc_match)(struct if_clone *, const char *);
-	int	(*ifc_create)(struct if_clone *, char *, size_t, caddr_t);
-	int	(*ifc_destroy)(struct if_clone *, struct ifnet *);
+typedef int	ifcs_create_t(struct if_clone *, int, caddr_t);
+typedef void	ifcs_destroy_t(struct ifnet *);
 
-	long ifc_refcnt;		/* (i) Refrence count. */
-	struct mtx ifc_mtx;		/* Muted to protect members. */
-	LIST_HEAD(, ifnet) ifc_iflist;	/* (i) List of cloned interfaces */
-};
-
-void	if_clone_init(void);
-void	if_clone_attach(struct if_clone *);
+/* Interface cloner (de)allocating functions. */
+struct if_clone *
+	if_clone_advanced(const char *, u_int, ifc_match_t, ifc_create_t,
+		      ifc_destroy_t);
+struct if_clone *
+	if_clone_simple(const char *, ifcs_create_t, ifcs_destroy_t, u_int);
 void	if_clone_detach(struct if_clone *);
-void	vnet_if_clone_init(void);
 
-int	if_clone_create(char *, size_t, caddr_t);
-int	if_clone_destroy(const char *);
-int	if_clone_destroyif(struct if_clone *, struct ifnet *);
-int	if_clone_list(struct if_clonereq *);
-
+/* Unit (de)allocating fucntions. */
 int	ifc_name2unit(const char *name, int *unit);
 int	ifc_alloc_unit(struct if_clone *, int *);
 void	ifc_free_unit(struct if_clone *, int);
 
-/*
- * The ifc_simple functions, structures, and macros implement basic
- * cloning as in 5.[012].
- */
-
-struct ifc_simple_data {
-	int ifcs_minifs;		/* minimum number of interfaces */
-
-	int	(*ifcs_create)(struct if_clone *, int, caddr_t);
-	void	(*ifcs_destroy)(struct ifnet *);
-};
-
-/* interface clone event */
+/* Interface clone event. */
 typedef void (*if_clone_event_handler_t)(void *, struct if_clone *);
 EVENTHANDLER_DECLARE(if_clone_event, if_clone_event_handler_t);
 
-#define IFC_SIMPLE_DECLARE(name, minifs)				\
-struct ifc_simple_data name##_cloner_data =				\
-    {minifs, name##_clone_create, name##_clone_destroy};		\
-struct if_clone name##_cloner =						\
-    IFC_CLONE_INITIALIZER(#name, &name##_cloner_data, IF_MAXUNIT,	\
-    ifc_simple_attach, ifc_simple_match, ifc_simple_create, ifc_simple_destroy)
+/* The below interfaces used only by net/if.c. */
+void	if_clone_init(void);
+void	vnet_if_clone_init(void);
+int	if_clone_create(char *, size_t, caddr_t);
+int	if_clone_destroy(const char *);
+int	if_clone_list(struct if_clonereq *);
+struct if_clone *if_clone_findifc(struct ifnet *);
+void	if_clone_addgroup(struct ifnet *, struct if_clone *);
 
-void	ifc_simple_attach(struct if_clone *);
-int	ifc_simple_match(struct if_clone *, const char *);
-int	ifc_simple_create(struct if_clone *, char *, size_t, caddr_t);
-int	ifc_simple_destroy(struct if_clone *, struct ifnet *);
+/* The below interface used only by epair(4). */
+int	if_clone_destroyif(struct if_clone *, struct ifnet *);
 
 #endif /* _KERNEL */
-
 #endif /* !_NET_IF_CLONE_H_ */

Modified: trunk/sys/net/if_dead.c
===================================================================
--- trunk/sys/net/if_dead.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_dead.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_dead.c 199975 2009-11-30 21:25:57Z jhb $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_dead.c 249925 2013-04-26 12:50:32Z glebius $");
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
@@ -43,7 +43,7 @@
 #include <net/if_var.h>
 
 static int
-ifdead_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
+ifdead_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
     struct route *ro)
 {
 

Modified: trunk/sys/net/if_debug.c
===================================================================
--- trunk/sys/net/if_debug.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_debug.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_debug.c 223735 2011-07-03 12:22:02Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_debug.c 223735 2011-07-03 12:22:02Z bz $");
 
 #include "opt_ddb.h"
 

Modified: trunk/sys/net/if_disc.c
===================================================================
--- trunk/sys/net/if_disc.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_disc.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	From: @(#)if_loop.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if_disc.c 191148 2009-04-16 20:30:28Z kmacy $
+ * $FreeBSD: stable/10/sys/net/if_disc.c 263478 2014-03-21 15:15:30Z glebius $
  */
 
 /*
@@ -60,22 +60,21 @@
 #define DSMTU	65532
 #endif
 
-#define DISCNAME	"disc"
-
 struct disc_softc {
 	struct ifnet *sc_ifp;
 };
 
 static int	discoutput(struct ifnet *, struct mbuf *,
-		    struct sockaddr *, struct route *);
+		    const struct sockaddr *, struct route *);
 static void	discrtrequest(int, struct rtentry *, struct rt_addrinfo *);
 static int	discioctl(struct ifnet *, u_long, caddr_t);
 static int	disc_clone_create(struct if_clone *, int, caddr_t);
 static void	disc_clone_destroy(struct ifnet *);
 
-static MALLOC_DEFINE(M_DISC, DISCNAME, "Discard interface");
+static const char discname[] = "disc";
+static MALLOC_DEFINE(M_DISC, discname, "Discard interface");
 
-IFC_SIMPLE_DECLARE(disc, 0);
+static struct if_clone *disc_cloner;
 
 static int
 disc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
@@ -91,7 +90,7 @@
 	}
 
 	ifp->if_softc = sc;
-	if_initname(ifp, ifc->ifc_name, unit);
+	if_initname(ifp, discname, unit);
 	ifp->if_mtu = DSMTU;
 	/*
 	 * IFF_LOOPBACK should not be removed from disc's flags because
@@ -136,10 +135,11 @@
 
 	switch (type) {
 	case MOD_LOAD:
-		if_clone_attach(&disc_cloner);
+		disc_cloner = if_clone_simple(discname, disc_clone_create,
+		    disc_clone_destroy, 0);
 		break;
 	case MOD_UNLOAD:
-		if_clone_detach(&disc_cloner);
+		if_clone_detach(disc_cloner);
 		break;
 	default:
 		return (EOPNOTSUPP);
@@ -156,7 +156,7 @@
 DECLARE_MODULE(if_disc, disc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 
 static int
-discoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+discoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	u_int32_t af;
@@ -164,15 +164,14 @@
 	M_ASSERTPKTHDR(m);
 
 	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC) {
+	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
-		dst->sa_family = af;
-	}
+	else
+		af = dst->sa_family;
 
-	if (bpf_peers_present(ifp->if_bpf)) {
-		u_int af = dst->sa_family;
+	if (bpf_peers_present(ifp->if_bpf))
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
-	}
+
 	m->m_pkthdr.rcvif = ifp;
 
 	ifp->if_opackets++;
@@ -187,7 +186,7 @@
 discrtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
 {
 	RT_LOCK_ASSERT(rt);
-	rt->rt_rmx.rmx_mtu = DSMTU;
+	rt->rt_mtu = DSMTU;
 }
 
 /*

Modified: trunk/sys/net/if_dl.h
===================================================================
--- trunk/sys/net/if_dl.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_dl.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_dl.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if_dl.h 139823 2005-01-07 01:45:51Z imp $
+ * $FreeBSD: stable/10/sys/net/if_dl.h 235640 2012-05-19 02:39:43Z marcel $
  */
 
 #ifndef _NET_IF_DL_H_
@@ -68,6 +68,7 @@
 };
 
 #define LLADDR(s) ((caddr_t)((s)->sdl_data + (s)->sdl_nlen))
+#define LLINDEX(s) ((s)->sdl_index)
 
 #ifndef _KERNEL
 

Modified: trunk/sys/net/if_edsc.c
===================================================================
--- trunk/sys/net/if_edsc.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_edsc.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	From: @(#)if_loop.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if_edsc.c 167896 2007-03-26 09:05:10Z yar $
+ * $FreeBSD: stable/10/sys/net/if_edsc.c 241610 2012-10-16 13:37:54Z glebius $
  */
 
 /*
@@ -52,6 +52,8 @@
 #include <net/if_types.h>	/* IFT_ETHER and friends */
 #include <net/if_var.h>		/* kernel-only part of ifnet(9) */
 
+static const char edscname[] = "edsc";
+
 /*
  * Software configuration of an interface specific to this device type.
  */
@@ -65,9 +67,9 @@
 };
 
 /*
- * Simple cloning methods.
- * IFC_SIMPLE_DECLARE() expects precisely these names.
+ * Attach to the interface cloning framework.
  */
+static struct if_clone *edsc_cloner;
 static int	edsc_clone_create(struct if_clone *, int, caddr_t);
 static void	edsc_clone_destroy(struct ifnet *);
 
@@ -82,17 +84,9 @@
 /*
  * We'll allocate softc instances from this.
  */
-static		MALLOC_DEFINE(M_EDSC, "edsc", "Ethernet discard interface");
+static		MALLOC_DEFINE(M_EDSC, edscname, "Ethernet discard interface");
 
 /*
- * Attach to the interface cloning framework under the name of "edsc".
- * The second argument is the number of units to be created from
- * the outset.  It's also the minimum number of units allowed.
- * We don't want any units created as soon as the driver is loaded.
- */
-IFC_SIMPLE_DECLARE(edsc, 0);
-
-/*
  * Create an interface instance.
  */
 static int
@@ -117,7 +111,7 @@
 	/*
 	 * Get a name for this particular interface in its ifnet structure.
 	 */
-	if_initname(ifp, ifc->ifc_name, unit);
+	if_initname(ifp, edscname, unit);
 
 	/*
 	 * Typical Ethernet interface flags: we can do broadcast and
@@ -324,8 +318,13 @@
 	case MOD_LOAD:
 		/*
 		 * Connect to the network interface cloning framework.
+		 * The last argument is the number of units to be created
+		 * from the outset.  It's also the minimum number of units
+		 * allowed.  We don't want any units created as soon as the
+		 * driver is loaded.
 		 */
-		if_clone_attach(&edsc_cloner);
+		edsc_cloner = if_clone_simple(edscname, edsc_clone_create,
+		    edsc_clone_destroy, 0);
 		break;
 
 	case MOD_UNLOAD:
@@ -333,7 +332,7 @@
 		 * Disconnect from the cloning framework.
 		 * Existing interfaces will be disposed of properly.
 		 */
-		if_clone_detach(&edsc_cloner);
+		if_clone_detach(edsc_cloner);
 		break;
 
 	default:

Modified: trunk/sys/net/if_ef.c
===================================================================
--- trunk/sys/net/if_ef.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_ef.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -24,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/if_ef.c 207554 2010-05-03 07:32:50Z sobomax $
+ * $FreeBSD: stable/10/sys/net/if_ef.c 249925 2013-04-26 12:50:32Z glebius $
  */
 
 #include "opt_inet.h"
@@ -103,7 +103,7 @@
 
 extern int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m);
 extern int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp,
-		struct sockaddr *dst, short *tp, int *hlen);
+		const struct sockaddr *dst, short *tp, int *hlen);
 
 /*
 static void ef_reset (struct ifnet *);
@@ -115,7 +115,7 @@
 static void ef_start(struct ifnet *);
 static int ef_input(struct ifnet*, struct ether_header *, struct mbuf *);
 static int ef_output(struct ifnet *ifp, struct mbuf **mp,
-		struct sockaddr *dst, short *tp, int *hlen);
+		const struct sockaddr *dst, short *tp, int *hlen);
 
 static int ef_load(void);
 static int ef_unload(void);
@@ -152,14 +152,10 @@
 ef_detach(struct efnet *sc)
 {
 	struct ifnet *ifp = sc->ef_ifp;
-	int s;
 
-	s = splimp();
-
 	ether_ifdetach(ifp);
 	if_free(ifp);
 
-	splx(s);
 	return 0;
 }
 
@@ -173,11 +169,10 @@
 {
 	struct efnet *sc = ifp->if_softc;
 	struct ifaddr *ifa = (struct ifaddr*)data;
-	int s, error;
+	int error;
 
 	EFDEBUG("IOCTL %ld for %s\n", cmd, ifp->if_xname);
 	error = 0;
-	s = splimp();
 	switch (cmd) {
 	    case SIOCSIFFLAGS:
 		error = 0;
@@ -194,7 +189,6 @@
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
-	splx(s);
 	return error;
 }
 
@@ -393,8 +387,8 @@
 }
 
 static int
-ef_output(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst, short *tp,
-	int *hlen)
+ef_output(struct ifnet *ifp, struct mbuf **mp, const struct sockaddr *dst,
+	short *tp, int *hlen)
 {
 	struct efnet *sc = (struct efnet*)ifp->if_softc;
 	struct mbuf *m = *mp;
@@ -415,7 +409,7 @@
 		type = htons(m->m_pkthdr.len);
 		break;
 	    case ETHER_FT_8022:
-		M_PREPEND(m, ETHER_HDR_LEN + 3, M_WAIT);
+		M_PREPEND(m, ETHER_HDR_LEN + 3, M_WAITOK);
 		/*
 		 * Ensure that ethernet header and next three bytes
 		 * will fit into single mbuf
@@ -434,7 +428,7 @@
 		*hlen += 3;
 		break;
 	    case ETHER_FT_SNAP:
-		M_PREPEND(m, 8, M_WAIT);
+		M_PREPEND(m, 8, M_WAITOK);
 		type = htons(m->m_pkthdr.len);
 		cp = mtod(m, u_char *);
 		bcopy("\xAA\xAA\x03\x00\x00\x00\x81\x37", cp, 8);

Modified: trunk/sys/net/if_enc.c
===================================================================
--- trunk/sys/net/if_enc.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_enc.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -25,7 +25,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/if_enc.c 248085 2013-03-09 02:36:32Z marius $
+ * $FreeBSD: stable/10/sys/net/if_enc.c 255926 2013-09-28 14:14:23Z glebius $
  */
 
 #include "opt_inet.h"
@@ -89,12 +89,12 @@
 
 static int	enc_ioctl(struct ifnet *, u_long, caddr_t);
 static int	enc_output(struct ifnet *ifp, struct mbuf *m,
-		    struct sockaddr *dst, struct route *ro);
+		    const struct sockaddr *dst, struct route *ro);
 static int	enc_clone_create(struct if_clone *, int, caddr_t);
 static void	enc_clone_destroy(struct ifnet *);
+static struct if_clone *enc_cloner;
+static const char encname[] = "enc";
 
-IFC_SIMPLE_DECLARE(enc, 1);
-
 /*
  * Sysctls.
  */
@@ -144,7 +144,7 @@
 		return (ENOSPC);
 	}
 
-	if_initname(ifp, ifc->ifc_name, unit);
+	if_initname(ifp, encname, unit);
 	ifp->if_mtu = ENCMTU;
 	ifp->if_ioctl = enc_ioctl;
 	ifp->if_output = enc_output;
@@ -168,7 +168,8 @@
 	switch (type) {
 	case MOD_LOAD:
 		mtx_init(&enc_mtx, "enc mtx", NULL, MTX_DEF);
-		if_clone_attach(&enc_cloner);
+		enc_cloner = if_clone_simple(encname, enc_clone_create,
+		    enc_clone_destroy, 1);
 		break;
 	case MOD_UNLOAD:
 		printf("enc module unload - not possible for this module\n");
@@ -188,7 +189,7 @@
 DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
 
 static int
-enc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+enc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	m_freem(m);
@@ -271,23 +272,8 @@
 	switch (ip->ip_v) {
 #ifdef INET
 		case 4:
-			/*
-			 * before calling the firewall, swap fields the same as
-			 * IP does. here we assume the header is contiguous
-			 */
-			ip->ip_len = ntohs(ip->ip_len);
-			ip->ip_off = ntohs(ip->ip_off);
-
 			error = pfil_run_hooks(&V_inet_pfil_hook, mp,
 			    encif, dir, NULL);
-
-			if (*mp == NULL || error != 0)
-				break;
-
-			/* restore byte ordering */
-			ip = mtod(*mp, struct ip *);
-			ip->ip_len = htons(ip->ip_len);
-			ip->ip_off = htons(ip->ip_off);
 			break;
 #endif
 #ifdef INET6

Modified: trunk/sys/net/if_enc.h
===================================================================
--- trunk/sys/net/if_enc.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_enc.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -25,7 +25,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/if_enc.h 181627 2008-08-12 09:05:01Z vanhu $
+ * $FreeBSD: stable/10/sys/net/if_enc.h 181627 2008-08-12 09:05:01Z vanhu $
  */
 
 #ifndef _NET_IF_ENC_H

Modified: trunk/sys/net/if_epair.c
===================================================================
--- trunk/sys/net/if_epair.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_epair.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -49,7 +49,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_epair.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_epair.c 287594 2015-09-09 08:52:39Z hrs $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -73,8 +73,6 @@
 #include <net/netisr.h>
 #include <net/vnet.h>
 
-#define	EPAIRNAME	"epair"
-
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl");
 
@@ -101,9 +99,11 @@
 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t);
 static int epair_clone_destroy(struct if_clone *, struct ifnet *);
 
-/* Netisr realted definitions and sysctl. */
+static const char epairname[] = "epair";
+
+/* Netisr related definitions and sysctl. */
 static struct netisr_handler epair_nh = {
-	.nh_name	= EPAIRNAME,
+	.nh_name	= epairname,
 	.nh_proto	= NETISR_EPAIR,
 	.nh_policy	= NETISR_POLICY_CPU,
 	.nh_handler	= epair_nh_sintr,
@@ -169,12 +169,11 @@
 #define	EPAIR_REFCOUNT_ASSERT(a, p)
 #endif
 
-static MALLOC_DEFINE(M_EPAIR, EPAIRNAME,
+static MALLOC_DEFINE(M_EPAIR, epairname,
     "Pair of virtual cross-over connected Ethernet-like interfaces");
 
-static struct if_clone epair_cloner = IFC_CLONE_INITIALIZER(
-    EPAIRNAME, NULL, IF_MAXUNIT,
-    NULL, epair_clone_match, epair_clone_create, epair_clone_destroy);
+static VNET_DEFINE(struct if_clone *, epair_cloner);
+#define	V_epair_cloner	VNET(epair_cloner)
 
 /*
  * DPCPU area and functions.
@@ -693,10 +692,10 @@
 	 * - epair<n>
 	 * but not the epair<n>[ab] versions.
 	 */
-	if (strncmp(EPAIRNAME, name, sizeof(EPAIRNAME)-1) != 0)
+	if (strncmp(epairname, name, sizeof(epairname)-1) != 0)
 		return (0);
 
-	for (cp = name + sizeof(EPAIRNAME) - 1; *cp != '\0'; cp++) {
+	for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) {
 		if (*cp < '0' || *cp > '9')
 			return (0);
 	}
@@ -715,7 +714,7 @@
 
 	/*
 	 * We are abusing params to create our second interface.
-	 * Actually we already created it and called if_clone_createif()
+	 * Actually we already created it and called if_clone_create()
 	 * for it to do the official insertion procedure the moment we knew
 	 * it cannot fail anymore. So just do attach it here.
 	 */
@@ -762,10 +761,17 @@
 		ifc_free_unit(ifc, unit);
 		return (ENOSPC);
 	}
-	*dp = 'a';
+	*dp = 'b';
 	/* Must not change dp so we can replace 'a' by 'b' later. */
 	*(dp+1) = '\0';
 
+	/* Check if 'a' and 'b' interfaces already exist. */ 
+	if (ifunit(name) != NULL)
+		return (EEXIST);
+	*dp = 'a';
+	if (ifunit(name) != NULL)
+		return (EEXIST);
+
 	/* Allocate memory for both [ab] interfaces */
 	sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
 	EPAIR_REFCOUNT_INIT(&sca->refcount, 1);
@@ -803,12 +809,20 @@
 	    netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount());
 	scb->cpuid =
 	    netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount());
+
+	/* Initialise pseudo media types. */
+	ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
+	ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL);
+	ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T);
+	ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status);
+	ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL);
+	ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T);
 	
 	/* Finish initialization of interface <n>a. */
 	ifp = sca->ifp;
 	ifp->if_softc = sca;
 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
-	ifp->if_dname = ifc->ifc_name;
+	ifp->if_dname = epairname;
 	ifp->if_dunit = unit;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_capabilities = IFCAP_VLAN_MTU;
@@ -826,7 +840,7 @@
 	sca->if_qflush = ifp->if_qflush;
 	ifp->if_qflush = epair_qflush;
 	ifp->if_transmit = epair_transmit;
-	ifp->if_baudrate = IF_Gbps(10UL);	/* arbitrary maximum */
+	if_initbaudrate(ifp, IF_Gbps(10));	/* arbitrary maximum */
 
 	/* Swap the name and finish initialization of interface <n>b. */
 	*dp = 'b';
@@ -834,7 +848,7 @@
 	ifp = scb->ifp;
 	ifp->if_softc = scb;
 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
-	ifp->if_dname = ifc->ifc_name;
+	ifp->if_dname = epairname;
 	ifp->if_dunit = unit;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_capabilities = IFCAP_VLAN_MTU;
@@ -844,15 +858,15 @@
 	ifp->if_init  = epair_init;
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	/* We need to play some tricks here for the second interface. */
-	strlcpy(name, EPAIRNAME, len);
+	strlcpy(name, epairname, len);
 	error = if_clone_create(name, len, (caddr_t)scb);
 	if (error)
-		panic("%s: if_clone_createif() for our 2nd iface failed: %d",
+		panic("%s: if_clone_create() for our 2nd iface failed: %d",
 		    __func__, error);
 	scb->if_qflush = ifp->if_qflush;
 	ifp->if_qflush = epair_qflush;
 	ifp->if_transmit = epair_transmit;
-	ifp->if_baudrate = IF_Gbps(10UL);	/* arbitrary maximum */
+	if_initbaudrate(ifp, IF_Gbps(10));	/* arbitrary maximum */
 
 	/*
 	 * Restore name to <n>a as the ifp for this will go into the
@@ -861,14 +875,6 @@
 	strlcpy(name, sca->ifp->if_xname, len);
 	DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb);
 
-	/* Initialise pseudo media types. */
-	ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
-	ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL);
-	ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T);
-	ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status);
-	ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL);
-	ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T);
-
 	/* Tell the world, that we are ready to rock. */
 	sca->ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	scb->ifp->if_drv_flags |= IFF_DRV_RUNNING;
@@ -946,6 +952,25 @@
 	return (0);
 }
 
+static void
+vnet_epair_init(const void *unused __unused)
+{
+
+	V_epair_cloner = if_clone_advanced(epairname, 0,
+	    epair_clone_match, epair_clone_create, epair_clone_destroy);
+}
+VNET_SYSINIT(vnet_epair_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_epair_init, NULL);
+
+static void
+vnet_epair_uninit(const void *unused __unused)
+{
+
+	if_clone_detach(V_epair_cloner);
+}
+VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_epair_uninit, NULL);
+
 static int
 epair_modevent(module_t mod, int type, void *data)
 {
@@ -959,16 +984,14 @@
 		if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit))
 		    epair_nh.nh_qlimit = qlimit;
 		netisr_register(&epair_nh);
-		if_clone_attach(&epair_cloner);
 		if (bootverbose)
-			printf("%s initialized.\n", EPAIRNAME);
+			printf("%s initialized.\n", epairname);
 		break;
 	case MOD_UNLOAD:
-		if_clone_detach(&epair_cloner);
 		netisr_unregister(&epair_nh);
 		epair_dpcpu_detach();
 		if (bootverbose)
-			printf("%s unloaded.\n", EPAIRNAME);
+			printf("%s unloaded.\n", epairname);
 		break;
 	default:
 		return (EOPNOTSUPP);

Modified: trunk/sys/net/if_ethersubr.c
===================================================================
--- trunk/sys/net/if_ethersubr.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_ethersubr.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if_ethersubr.c 249132 2013-04-05 08:22:11Z mav $
+ * $FreeBSD: stable/10/sys/net/if_ethersubr.c 332160 2018-04-07 00:04:28Z brooks $
  */
 
 #include "opt_atalk.h"
@@ -40,6 +40,8 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
@@ -46,10 +48,10 @@
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/random.h>
-#include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
+#include <sys/uuid.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
@@ -63,9 +65,11 @@
 #include <net/if_bridgevar.h>
 #include <net/if_vlan_var.h>
 #include <net/if_llatbl.h>
-#include <net/pf_mtag.h>
+#include <net/pfil.h>
 #include <net/vnet.h>
 
+#include <netpfil/pf/pf_mtag.h>
+
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
@@ -72,8 +76,6 @@
 #include <netinet/if_ether.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip_var.h>
-#include <netinet/ip_fw.h>
-#include <netpfil/ipfw/ip_fw_private.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
@@ -86,7 +88,7 @@
 
 int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m);
 int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp,
-		struct sockaddr *dst, short *tp, int *hlen);
+		const struct sockaddr *dst, short *tp, int *hlen);
 
 #ifdef NETATALK
 #include <netatalk/at.h>
@@ -107,6 +109,8 @@
 CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
 #endif
 
+VNET_DEFINE(struct pfil_head, link_pfil_hook);	/* Packet filter hooks */
+
 /* netgraph node hooks for ng_ether(4) */
 void	(*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
@@ -142,13 +146,21 @@
 
 #define senderr(e) do { error = (e); goto bad;} while (0)
 
-#if defined(INET) || defined(INET6)
-int
-ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared);
-static VNET_DEFINE(int, ether_ipfw);
-#define	V_ether_ipfw	VNET(ether_ipfw)
-#endif
+static void
+update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
+{
+	int csum_flags = 0;
 
+	if (src->m_pkthdr.csum_flags & CSUM_IP)
+		csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
+	if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
+		csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
+	if (src->m_pkthdr.csum_flags & CSUM_SCTP)
+		csum_flags |= CSUM_SCTP_VALID;
+	dst->m_pkthdr.csum_flags |= csum_flags;
+	if (csum_flags & CSUM_DATA_VALID)
+		dst->m_pkthdr.csum_data = 0xffff;
+}
 
 /*
  * Ethernet output routine.
@@ -158,7 +170,7 @@
  */
 int
 ether_output(struct ifnet *ifp, struct mbuf *m,
-	struct sockaddr *dst, struct route *ro)
+	const struct sockaddr *dst, struct route *ro)
 {
 	short type;
 	int error = 0, hdrcmplt = 0;
@@ -247,8 +259,8 @@
 			goto bad;
 		} else
 		    type = htons(ETHERTYPE_IPX);
-		bcopy((caddr_t)&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
-		    (caddr_t)edst, sizeof (edst));
+		bcopy(&((const struct sockaddr_ipx *)dst)->sipx_addr.x_host,
+		    edst, sizeof (edst));
 		break;
 #endif
 #ifdef NETATALK
@@ -256,9 +268,9 @@
 	  {
 	    struct at_ifaddr *aa;
 
-	    if ((aa = at_ifawithnet((struct sockaddr_at *)dst)) == NULL)
+	    if ((aa = at_ifawithnet((const struct sockaddr_at *)dst)) == NULL)
 		    senderr(EHOSTUNREACH); /* XXX */
-	    if (!aarpresolve(ifp, m, (struct sockaddr_at *)dst, edst)) {
+	    if (!aarpresolve(ifp, m, (const struct sockaddr_at *)dst, edst)) {
 		    ifa_free(&aa->aa_ifa);
 		    return (0);
 	    }
@@ -269,7 +281,7 @@
 		struct llc llc;
 
 		ifa_free(&aa->aa_ifa);
-		M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
+		M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
 		if (m == NULL)
 			senderr(ENOBUFS);
 		llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
@@ -288,18 +300,21 @@
 #endif /* NETATALK */
 
 	case pseudo_AF_HDRCMPLT:
+	    {
+		const struct ether_header *eh;
+		
 		hdrcmplt = 1;
-		eh = (struct ether_header *)dst->sa_data;
+		eh = (const struct ether_header *)dst->sa_data;
 		(void)memcpy(esrc, eh->ether_shost, sizeof (esrc));
 		/* FALLTHROUGH */
 
 	case AF_UNSPEC:
 		loop_copy = 0; /* if this is for us, don't do it */
-		eh = (struct ether_header *)dst->sa_data;
+		eh = (const struct ether_header *)dst->sa_data;
 		(void)memcpy(edst, eh->ether_dhost, sizeof (edst));
 		type = eh->ether_type;
 		break;
-
+            }
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		senderr(EAFNOSUPPORT);
@@ -306,15 +321,7 @@
 	}
 
 	if (lle != NULL && (lle->la_flags & LLE_IFADDR)) {
-		int csum_flags = 0;
-		if (m->m_pkthdr.csum_flags & CSUM_IP)
-			csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
-		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
-			csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
-		if (m->m_pkthdr.csum_flags & CSUM_SCTP)
-			csum_flags |= CSUM_SCTP_VALID;
-		m->m_pkthdr.csum_flags |= csum_flags;
-		m->m_pkthdr.csum_data = 0xffff;
+		update_mbuf_csumflags(m, m);
 		return (if_simloop(ifp, m, dst->sa_family, 0));
 	}
 
@@ -322,7 +329,7 @@
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 */
-	M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
+	M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
 	if (m == NULL)
 		senderr(ENOBUFS);
 	eh = mtod(m, struct ether_header *);
@@ -347,15 +354,6 @@
 	 */
 	if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy &&
 	    ((t = pf_find_mtag(m)) == NULL || !t->routed)) {
-		int csum_flags = 0;
-
-		if (m->m_pkthdr.csum_flags & CSUM_IP)
-			csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
-		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
-			csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
-		if (m->m_pkthdr.csum_flags & CSUM_SCTP)
-			csum_flags |= CSUM_SCTP_VALID;
-
 		if (m->m_flags & M_BCAST) {
 			struct mbuf *n;
 
@@ -371,18 +369,14 @@
 			 * often used kernel parts suffer from the same bug.
 			 * See PR kern/105943 for a proposed general solution.
 			 */
-			if ((n = m_dup(m, M_DONTWAIT)) != NULL) {
-				n->m_pkthdr.csum_flags |= csum_flags;
-				if (csum_flags & CSUM_DATA_VALID)
-					n->m_pkthdr.csum_data = 0xffff;
+			if ((n = m_dup(m, M_NOWAIT)) != NULL) {
+				update_mbuf_csumflags(m, n);
 				(void)if_simloop(ifp, n, dst->sa_family, hlen);
 			} else
 				ifp->if_iqdrops++;
 		} else if (bcmp(eh->ether_dhost, eh->ether_shost,
 				ETHER_ADDR_LEN) == 0) {
-			m->m_pkthdr.csum_flags |= csum_flags;
-			if (csum_flags & CSUM_DATA_VALID)
-				m->m_pkthdr.csum_data = 0xffff;
+			update_mbuf_csumflags(m, m);
 			(void) if_simloop(ifp, m, dst->sa_family, hlen);
 			return (0);	/* XXX */
 		}
@@ -398,7 +392,7 @@
 
 #if defined(INET) || defined(INET6)
 	if (ifp->if_carp &&
-	    (error = (*carp_output_p)(ifp, m, dst, NULL)))
+	    (error = (*carp_output_p)(ifp, m, dst)))
 		goto bad;
 #endif
 
@@ -428,18 +422,17 @@
 int
 ether_output_frame(struct ifnet *ifp, struct mbuf *m)
 {
-#if defined(INET) || defined(INET6)
+	int i;
 
-	if (V_ip_fw_chk_ptr && V_ether_ipfw != 0) {
-		if (ether_ipfw_chk(&m, ifp, 0) == 0) {
-			if (m) {
-				m_freem(m);
-				return EACCES;	/* pkt dropped */
-			} else
-				return 0;	/* consumed e.g. in a pipe */
-		}
+	if (PFIL_HOOKED(&V_link_pfil_hook)) {
+		i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_OUT, NULL);
+
+		if (i != 0)
+			return (EACCES);
+
+		if (m == NULL)
+			return (0);
 	}
-#endif
 
 	/*
 	 * Queue message on interface, update output statistics if
@@ -449,113 +442,6 @@
 }
 
 #if defined(INET) || defined(INET6)
-/*
- * ipfw processing for ethernet packets (in and out).
- * The second parameter is NULL from ether_demux, and ifp from
- * ether_output_frame.
- */
-int
-ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared)
-{
-	struct ether_header *eh;
-	struct ether_header save_eh;
-	struct mbuf *m;
-	int i;
-	struct ip_fw_args args;
-	struct m_tag *mtag;
-
-	/* fetch start point from rule, if any */
-	mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
-	if (mtag == NULL) {
-		args.rule.slot = 0;
-	} else {
-		/* dummynet packet, already partially processed */
-		struct ipfw_rule_ref *r;
-
-		/* XXX can we free it after use ? */
-		mtag->m_tag_id = PACKET_TAG_NONE;
-		r = (struct ipfw_rule_ref *)(mtag + 1);
-		if (r->info & IPFW_ONEPASS)
-			return (1);
-		args.rule = *r;
-	}
-
-	/*
-	 * I need some amt of data to be contiguous, and in case others need
-	 * the packet (shared==1) also better be in the first mbuf.
-	 */
-	m = *m0;
-	i = min( m->m_pkthdr.len, max_protohdr);
-	if ( shared || m->m_len < i) {
-		m = m_pullup(m, i);
-		if (m == NULL) {
-			*m0 = m;
-			return 0;
-		}
-	}
-	eh = mtod(m, struct ether_header *);
-	save_eh = *eh;			/* save copy for restore below */
-	m_adj(m, ETHER_HDR_LEN);	/* strip ethernet header */
-
-	args.m = m;		/* the packet we are looking at		*/
-	args.oif = dst;		/* destination, if any			*/
-	args.next_hop = NULL;	/* we do not support forward yet	*/
-	args.next_hop6 = NULL;	/* we do not support forward yet	*/
-	args.eh = &save_eh;	/* MAC header for bridged/MAC packets	*/
-	args.inp = NULL;	/* used by ipfw uid/gid/jail rules	*/
-	i = V_ip_fw_chk_ptr(&args);
-	m = args.m;
-	if (m != NULL) {
-		/*
-		 * Restore Ethernet header, as needed, in case the
-		 * mbuf chain was replaced by ipfw.
-		 */
-		M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
-		if (m == NULL) {
-			*m0 = m;
-			return 0;
-		}
-		if (eh != mtod(m, struct ether_header *))
-			bcopy(&save_eh, mtod(m, struct ether_header *),
-				ETHER_HDR_LEN);
-	}
-	*m0 = m;
-
-	if (i == IP_FW_DENY) /* drop */
-		return 0;
-
-	KASSERT(m != NULL, ("ether_ipfw_chk: m is NULL"));
-
-	if (i == IP_FW_PASS) /* a PASS rule.  */
-		return 1;
-
-	if (ip_dn_io_ptr && (i == IP_FW_DUMMYNET)) {
-		int dir;
-		/*
-		 * Pass the pkt to dummynet, which consumes it.
-		 * If shared, make a copy and keep the original.
-		 */
-		if (shared) {
-			m = m_copypacket(m, M_DONTWAIT);
-			if (m == NULL)
-				return 0;
-		} else {
-			/*
-			 * Pass the original to dummynet and
-			 * nothing back to the caller
-			 */
-			*m0 = NULL ;
-		}
-		dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN);
-		ip_dn_io_ptr(&m, dir, &args);
-		return 0;
-	}
-	/*
-	 * XXX at some point add support for divert/forward actions.
-	 * If none of the above matches, we have to drop the pkt.
-	 */
-	return 0;
-}
 #endif
 
 /*
@@ -646,7 +532,8 @@
 		m->m_flags &= ~M_HASFCS;
 	}
 
-	ifp->if_ibytes += m->m_pkthdr.len;
+	if (!(ifp->if_capenable & IFCAP_HWSTATS))
+		ifp->if_ibytes += m->m_pkthdr.len;
 
 	/* Allow monitor mode to claim this frame, after stats are updated. */
 	if (ifp->if_flags & IFF_MONITOR) {
@@ -695,6 +582,7 @@
 		bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
 		    ETHER_HDR_LEN - ETHER_TYPE_LEN);
 		m_adj(m, ETHER_VLAN_ENCAP_LEN);
+		eh = mtod(m, struct ether_header *);
 	}
 
 	M_SETFIB(m, ifp->if_fib);
@@ -709,6 +597,7 @@
 			CURVNET_RESTORE();
 			return;
 		}
+		eh = mtod(m, struct ether_header *);
 	}
 
 	/*
@@ -723,6 +612,7 @@
 			CURVNET_RESTORE();
 			return;
 		}
+		eh = mtod(m, struct ether_header *);
 	}
 
 #if defined(INET) || defined(INET6)
@@ -753,9 +643,8 @@
 			m->m_flags |= M_PROMISC;
 	}
 
-	/* First chunk of an mbuf contains good entropy */
 	if (harvest.ethernet)
-		random_harvest(m, 16, 3, 0, RANDOM_NET);
+		random_harvest(&(m->m_data), 12, 2, RANDOM_NET_ETHER);
 
 	ether_demux(ifp, m);
 	CURVNET_RESTORE();
@@ -789,6 +678,35 @@
 SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
 
 static void
+vnet_ether_init(__unused void *arg)
+{
+	int i;
+
+	/* Initialize packet filter hooks. */
+	V_link_pfil_hook.ph_type = PFIL_TYPE_AF;
+	V_link_pfil_hook.ph_af = AF_LINK;
+	if ((i = pfil_head_register(&V_link_pfil_hook)) != 0)
+		printf("%s: WARNING: unable to register pfil link hook, "
+			"error %d\n", __func__, i);
+}
+VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+    vnet_ether_init, NULL);
+ 
+static void
+vnet_ether_destroy(__unused void *arg)
+{
+	int i;
+
+	if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0)
+		printf("%s: WARNING: unable to unregister pfil link hook, "
+			"error %d\n", __func__, i);
+}
+VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+    vnet_ether_destroy, NULL);
+
+
+
+static void
 ether_input(struct ifnet *ifp, struct mbuf *m)
 {
 
@@ -808,7 +726,7 @@
 ether_demux(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
-	int isr;
+	int i, isr;
 	u_short ether_type;
 #if defined(NETATALK)
 	struct llc *l;
@@ -816,19 +734,14 @@
 
 	KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
 
-#if defined(INET) || defined(INET6)
-	/*
-	 * Allow dummynet and/or ipfw to claim the frame.
-	 * Do not do this for PROMISC frames in case we are re-entered.
-	 */
-	if (V_ip_fw_chk_ptr && V_ether_ipfw != 0 && !(m->m_flags & M_PROMISC)) {
-		if (ether_ipfw_chk(&m, NULL, 0) == 0) {
-			if (m)
-				m_freem(m);	/* dropped; free mbuf chain */
-			return;			/* consumed */
-		}
+	/* Do not grab PROMISC frames in case we are re-entered. */
+	if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) {
+		i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, NULL);
+
+		if (i != 0 || m == NULL)
+			return;
 	}
-#endif
+
 	eh = mtod(m, struct ether_header *);
 	ether_type = ntohs(eh->ether_type);
 
@@ -865,7 +778,7 @@
 	 * Strip off Ethernet header.
 	 */
 	m->m_flags &= ~M_VLANTAG;
-	m->m_flags &= ~(M_PROTOFLAGS);
+	m_clrprotoflags(m);
 	m_adj(m, ETHER_HDR_LEN);
 
 	/*
@@ -954,7 +867,7 @@
 		 * Put back the ethernet header so netgraph has a
 		 * consistent view of inbound packets.
 		 */
-		M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
+		M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
 		(*ng_ether_input_orphan_p)(ifp, m);
 		return;
 	}
@@ -1008,6 +921,9 @@
 	sdl->sdl_alen = ifp->if_addrlen;
 	bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
+	if (ifp->if_hw_addr != NULL)
+		bcopy(lla, ifp->if_hw_addr, ifp->if_addrlen);
+
 	bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
 	if (ng_ether_attach_p != NULL)
 		(*ng_ether_attach_p)(ifp);
@@ -1018,6 +934,13 @@
 			break; 
 	if (i != ifp->if_addrlen)
 		if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
+
+	uuid_ether_add(LLADDR(sdl));
+
+	/* Add necessary bits are setup; announce it now. */
+	EVENTHANDLER_INVOKE(ether_ifattach_event, ifp);
+	if (IS_DEFAULT_VNET(curvnet))
+		devctl_notify("ETHERNET", ifp->if_xname, "IFATTACH", NULL);
 }
 
 /*
@@ -1026,6 +949,11 @@
 void
 ether_ifdetach(struct ifnet *ifp)
 {
+	struct sockaddr_dl *sdl;
+
+	sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr);
+	uuid_ether_del(LLADDR(sdl));
+
 	if (IFP2AC(ifp)->ac_netgraph != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
@@ -1057,10 +985,6 @@
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
-#if defined(INET) || defined(INET6)
-SYSCTL_VNET_INT(_net_link_ether, OID_AUTO, ipfw, CTLFLAG_RW,
-	     &VNET_NAME(ether_ipfw), 0, "Pass ether pkts through firewall");
-#endif
 
 #if 0
 /*
@@ -1186,13 +1110,8 @@
 		break;
 
 	case SIOCGIFADDR:
-		{
-			struct sockaddr *sa;
-
-			sa = (struct sockaddr *) & ifr->ifr_data;
-			bcopy(IF_LLADDR(ifp),
-			      (caddr_t) sa->sa_data, ETHER_ADDR_LEN);
-		}
+		bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
+		    ETHER_ADDR_LEN);
 		break;
 
 	case SIOCSIFMTU:
@@ -1382,7 +1301,7 @@
 {
 	struct ether_vlan_header *evl;
 
-	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
+	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
 	if (m == NULL)
 		return (NULL);
 	/* M_PREPEND takes care of m_len, m_pkthdr.len for us */

Modified: trunk/sys/net/if_faith.c
===================================================================
--- trunk/sys/net/if_faith.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_faith.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -29,7 +29,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/if_faith.c 232292 2012-02-29 09:47:26Z bz $
+ * $FreeBSD: stable/10/sys/net/if_faith.c 263478 2014-03-21 15:15:30Z glebius $
  */
 /*
  * derived from
@@ -80,14 +80,12 @@
 #include <netinet6/ip6_var.h>
 #endif
 
-#define FAITHNAME	"faith"
-
 struct faith_softc {
 	struct ifnet *sc_ifp;
 };
 
 static int faithioctl(struct ifnet *, u_long, caddr_t);
-int faithoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
+static int faithoutput(struct ifnet *, struct mbuf *, const struct sockaddr *,
 	struct route *);
 static void faithrtrequest(int, struct rtentry *, struct rt_addrinfo *);
 #ifdef INET6
@@ -96,13 +94,13 @@
 
 static int faithmodevent(module_t, int, void *);
 
-static MALLOC_DEFINE(M_FAITH, FAITHNAME, "Firewall Assisted Tunnel Interface");
+static const char faithname[] = "faith";
+static MALLOC_DEFINE(M_FAITH, faithname, "Firewall Assisted Tunnel Interface");
 
 static int	faith_clone_create(struct if_clone *, int, caddr_t);
 static void	faith_clone_destroy(struct ifnet *);
+static struct if_clone *faith_cloner;
 
-IFC_SIMPLE_DECLARE(faith, 0);
-
 #define	FAITHMTU	1500
 
 static int
@@ -114,8 +112,8 @@
 
 	switch (type) {
 	case MOD_LOAD:
-		if_clone_attach(&faith_cloner);
-
+		faith_cloner = if_clone_simple(faithname, faith_clone_create,
+		    faith_clone_destroy, 0);
 #ifdef INET6
 		faithprefix_p = faithprefix;
 #endif
@@ -126,7 +124,7 @@
 		faithprefix_p = NULL;
 #endif
 
-		if_clone_detach(&faith_cloner);
+		if_clone_detach(faith_cloner);
 		break;
 	default:
 		return EOPNOTSUPP;
@@ -160,7 +158,7 @@
 	}
 
 	ifp->if_softc = sc;
-	if_initname(sc->sc_ifp, ifc->ifc_name, unit);
+	if_initname(sc->sc_ifp, faithname, unit);
 
 	ifp->if_mtu = FAITHMTU;
 	/* Change to BROADCAST experimentaly to announce its prefix. */
@@ -187,12 +185,9 @@
 	free(sc, M_FAITH);
 }
 
-int
-faithoutput(ifp, m, dst, ro)
-	struct ifnet *ifp;
-	struct mbuf *m;
-	struct sockaddr *dst;
-	struct route *ro;
+static int
+faithoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+	struct route *ro)
 {
 	int isr;
 	u_int32_t af;
@@ -203,15 +198,13 @@
 	if (ro != NULL)
 		rt = ro->ro_rt;
 	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC) {
+	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
-		dst->sa_family = af;
-	}
+	else
+		af = dst->sa_family;
 
-	if (bpf_peers_present(ifp->if_bpf)) {
-		af = dst->sa_family;
+	if (bpf_peers_present(ifp->if_bpf))
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
-	}
 
 	if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 		m_freem(m);
@@ -220,7 +213,7 @@
 	}
 	ifp->if_opackets++;
 	ifp->if_obytes += m->m_pkthdr.len;
-	switch (dst->sa_family) {
+	switch (af) {
 #ifdef INET
 	case AF_INET:
 		isr = NETISR_IP;
@@ -253,7 +246,7 @@
 	struct rt_addrinfo *info;
 {
 	RT_LOCK_ASSERT(rt);
-	rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+	rt->rt_mtu = rt->rt_ifp->if_mtu;
 }
 
 /*

Modified: trunk/sys/net/if_fddisubr.c
===================================================================
--- trunk/sys/net/if_fddisubr.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_fddisubr.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -34,7 +34,7 @@
  * SUCH DAMAGE.
  *
  *	from: if_ethersubr.c,v 1.5 1994/12/13 22:31:45 wollman Exp
- * $FreeBSD: stable/9/sys/net/if_fddisubr.c 223741 2011-07-03 16:08:38Z bz $
+ * $FreeBSD: stable/10/sys/net/if_fddisubr.c 332160 2018-04-07 00:04:28Z brooks $
  */
 
 #include "opt_atalk.h"
@@ -97,7 +97,7 @@
 
 static int fddi_resolvemulti(struct ifnet *, struct sockaddr **,
 			      struct sockaddr *);
-static int fddi_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+static int fddi_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
 		       struct route *); 
 static void fddi_input(struct ifnet *ifp, struct mbuf *m);
 
@@ -111,11 +111,8 @@
  * Assumes that ifp is actually pointer to arpcom structure.
  */
 static int
-fddi_output(ifp, m, dst, ro)
-	struct ifnet *ifp;
-	struct mbuf *m;
-	struct sockaddr *dst;
-	struct route *ro;
+fddi_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+	struct route *ro)
 {
 	u_int16_t type;
 	int loop_copy = 0, error = 0, hdrcmplt = 0;
@@ -190,19 +187,19 @@
 #ifdef IPX
 	case AF_IPX:
 		type = htons(ETHERTYPE_IPX);
- 		bcopy((caddr_t)&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
-		    (caddr_t)edst, FDDI_ADDR_LEN);
+ 		bcopy(&((const struct sockaddr_ipx *)dst)->sipx_addr.x_host,
+		    edst, FDDI_ADDR_LEN);
 		break;
 #endif /* IPX */
 #ifdef NETATALK
 	case AF_APPLETALK: {
 	    struct at_ifaddr *aa;
-            if (!aarpresolve(ifp, m, (struct sockaddr_at *)dst, edst))
+            if (!aarpresolve(ifp, m, (const struct sockaddr_at *)dst, edst))
                 return (0);
 	    /*
 	     * ifaddr is the first thing in at_ifaddr
 	     */
-	    if ((aa = at_ifawithnet( (struct sockaddr_at *)dst)) == 0)
+	    if ((aa = at_ifawithnet((const struct sockaddr_at *)dst)) == 0)
 		goto bad;
 	    
 	    /*
@@ -213,7 +210,7 @@
 	    if (aa->aa_flags & AFA_PHASE2) {
 		struct llc llc;
 
-		M_PREPEND(m, LLC_SNAPFRAMELEN, M_WAIT);
+		M_PREPEND(m, LLC_SNAPFRAMELEN, M_WAITOK);
 		llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
 		llc.llc_control = LLC_UI;
 		bcopy(at_org_code, llc.llc_snap.org_code, sizeof(at_org_code));
@@ -230,19 +227,21 @@
 
 	case pseudo_AF_HDRCMPLT:
 	{
-		struct ether_header *eh;
+		const struct ether_header *eh;
+
 		hdrcmplt = 1;
-		eh = (struct ether_header *)dst->sa_data;
-		bcopy((caddr_t)eh->ether_shost, (caddr_t)esrc, FDDI_ADDR_LEN);
+		eh = (const struct ether_header *)dst->sa_data;
+		bcopy(eh->ether_shost, esrc, FDDI_ADDR_LEN);
 		/* FALLTHROUGH */
 	}
 
 	case AF_UNSPEC:
 	{
-		struct ether_header *eh;
+		const struct ether_header *eh;
+
 		loop_copy = -1;
-		eh = (struct ether_header *)dst->sa_data;
-		bcopy((caddr_t)eh->ether_dhost, (caddr_t)edst, FDDI_ADDR_LEN);
+		eh = (const struct ether_header *)dst->sa_data;
+		bcopy(eh->ether_dhost, edst, FDDI_ADDR_LEN);
 		if (*edst & 1)
 			m->m_flags |= (M_BCAST|M_MCAST);
 		type = eh->ether_type;
@@ -292,7 +291,7 @@
 	 */
 	if (type != 0) {
 		struct llc *l;
-		M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
+		M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
 		if (m == 0)
 			senderr(ENOBUFS);
 		l = mtod(m, struct llc *);
@@ -308,7 +307,7 @@
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 */
-	M_PREPEND(m, FDDI_HDR_LEN, M_DONTWAIT);
+	M_PREPEND(m, FDDI_HDR_LEN, M_NOWAIT);
 	if (m == 0)
 		senderr(ENOBUFS);
 	fh = mtod(m, struct fddi_header *);
@@ -392,7 +391,6 @@
 		goto dropanyway;
 	}
 	fh = mtod(m, struct fddi_header *);
-	m->m_pkthdr.header = (void *)fh;
 
 	/*
 	 * Discard packet if interface is not up.
@@ -672,14 +670,9 @@
 			break;
 		}
 		break;
-	case SIOCGIFADDR: {
-			struct sockaddr *sa;
-
-			sa = (struct sockaddr *) & ifr->ifr_data;
-			bcopy(IF_LLADDR(ifp),
-			      (caddr_t) sa->sa_data, FDDI_ADDR_LEN);
-
-		}
+	case SIOCGIFADDR:
+		bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
+		    FDDI_ADDR_LEN);
 		break;
 	case SIOCSIFMTU:
 		/*

Modified: trunk/sys/net/if_fwsubr.c
===================================================================
--- trunk/sys/net/if_fwsubr.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_fwsubr.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/if_fwsubr.c 249132 2013-04-05 08:22:11Z mav $
+ * $FreeBSD: stable/10/sys/net/if_fwsubr.c 332160 2018-04-07 00:04:28Z brooks $
  */
 
 #include "opt_inet.h"
@@ -76,7 +76,7 @@
 };
 
 static int
-firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct fw_com *fc = IFP2FWC(ifp);
@@ -230,7 +230,7 @@
 		/*
 		 * No fragmentation is necessary.
 		 */
-		M_PREPEND(m, sizeof(uint32_t), M_DONTWAIT);
+		M_PREPEND(m, sizeof(uint32_t), M_NOWAIT);
 		if (!m) {
 			error = ENOBUFS;
 			goto bad;
@@ -262,7 +262,7 @@
 				 * Split off the tail segment from the
 				 * datagram, copying our tags over.
 				 */
-				mtail = m_split(m, fsize, M_DONTWAIT);
+				mtail = m_split(m, fsize, M_NOWAIT);
 				m_tag_copy_chain(mtail, m, M_NOWAIT);
 			} else {
 				mtail = 0;
@@ -272,7 +272,7 @@
 			 * Add our encapsulation header to this
 			 * fragment and hand it off to the link.
 			 */
-			M_PREPEND(m, 2*sizeof(uint32_t), M_DONTWAIT);
+			M_PREPEND(m, 2*sizeof(uint32_t), M_NOWAIT);
 			if (!m) {
 				error = ENOBUFS;
 				goto bad;
@@ -657,13 +657,8 @@
 		break;
 
 	case SIOCGIFADDR:
-		{
-			struct sockaddr *sa;
-
-			sa = (struct sockaddr *) & ifr->ifr_data;
-			bcopy(&IFP2FWC(ifp)->fc_hwaddr,
-			    (caddr_t) sa->sa_data, sizeof(struct fw_hwaddr));
-		}
+		bcopy(&IFP2FWC(ifp)->fc_hwaddr, &ifr->ifr_addr.sa_data[0],
+		    sizeof(struct fw_hwaddr));
 		break;
 
 	case SIOCSIFMTU:

Modified: trunk/sys/net/if_gif.c
===================================================================
--- trunk/sys/net/if_gif.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_gif.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,7 +1,4 @@
 /* $MidnightBSD$ */
-/*	$FreeBSD: stable/9/sys/net/if_gif.c 248085 2013-03-09 02:36:32Z marius $	*/
-/*	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $	*/
-
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
@@ -29,8 +26,13 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ *	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/net/if_gif.c 293411 2016-01-08 02:59:56Z araujo $");
+
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
@@ -38,11 +40,14 @@
 #include <sys/systm.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
+#include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
+#include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
+#include <sys/sx.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
@@ -54,6 +59,7 @@
 #include <machine/cpu.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
@@ -64,9 +70,9 @@
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
+#include <netinet/ip_ecn.h>
 #ifdef	INET
 #include <netinet/in_var.h>
-#include <netinet/in_gif.h>
 #include <netinet/ip_var.h>
 #endif	/* INET */
 
@@ -76,9 +82,9 @@
 #endif
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
+#include <netinet6/ip6_ecn.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
-#include <netinet6/in6_gif.h>
 #include <netinet6/ip6protosw.h>
 #endif /* INET6 */
 
@@ -89,27 +95,42 @@
 
 #include <security/mac/mac_framework.h>
 
-#define GIFNAME		"gif"
+static const char gifname[] = "gif";
 
 /*
- * gif_mtx protects the global gif_softc_list.
+ * gif_mtx protects a per-vnet gif_softc_list.
  */
-static struct mtx gif_mtx;
+static VNET_DEFINE(struct mtx, gif_mtx);
+#define	V_gif_mtx		VNET(gif_mtx)
 static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
 static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
 #define	V_gif_softc_list	VNET(gif_softc_list)
+static struct sx gif_ioctl_sx;
+SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
 
+#define	GIF_LIST_LOCK_INIT(x)		mtx_init(&V_gif_mtx, "gif_mtx", \
+					    NULL, MTX_DEF)
+#define	GIF_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_gif_mtx)
+#define	GIF_LIST_LOCK(x)		mtx_lock(&V_gif_mtx)
+#define	GIF_LIST_UNLOCK(x)		mtx_unlock(&V_gif_mtx)
+
 void	(*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
 void	(*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
 void	(*ng_gif_attach_p)(struct ifnet *ifp);
 void	(*ng_gif_detach_p)(struct ifnet *ifp);
 
-static void	gif_start(struct ifnet *);
+static int	gif_check_nesting(struct ifnet *, struct mbuf *);
+static int	gif_set_tunnel(struct ifnet *, struct sockaddr *,
+    struct sockaddr *);
+static void	gif_delete_tunnel(struct ifnet *);
+static int	gif_ioctl(struct ifnet *, u_long, caddr_t);
+static int	gif_transmit(struct ifnet *, struct mbuf *);
+static void	gif_qflush(struct ifnet *);
 static int	gif_clone_create(struct if_clone *, int, caddr_t);
 static void	gif_clone_destroy(struct ifnet *);
+static VNET_DEFINE(struct if_clone *, gif_cloner);
+#define	V_gif_cloner	VNET(gif_cloner)
 
-IFC_SIMPLE_DECLARE(gif, 0);
-
 static int gifmodevent(module_t, int, void *);
 
 SYSCTL_DECL(_net_link);
@@ -154,10 +175,7 @@
 #endif
 
 static int
-gif_clone_create(ifc, unit, params)
-	struct if_clone *ifc;
-	int unit;
-	caddr_t params;
+gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct gif_softc *sc;
 
@@ -164,19 +182,10 @@
 	sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
 	sc->gif_fibnum = curthread->td_proc->p_fibnum;
 	GIF2IFP(sc) = if_alloc(IFT_GIF);
-	if (GIF2IFP(sc) == NULL) {
-		free(sc, M_GIF);
-		return (ENOSPC);
-	}
-
 	GIF_LOCK_INIT(sc);
-
 	GIF2IFP(sc)->if_softc = sc;
-	if_initname(GIF2IFP(sc), ifc->ifc_name, unit);
+	if_initname(GIF2IFP(sc), gifname, unit);
 
-	sc->encap_cookie4 = sc->encap_cookie6 = NULL;
-	sc->gif_options = GIF_ACCEPT_REVETHIP;
-
 	GIF2IFP(sc)->if_addrlen = 0;
 	GIF2IFP(sc)->if_mtu    = GIF_MTU;
 	GIF2IFP(sc)->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
@@ -185,56 +194,42 @@
 	GIF2IFP(sc)->if_flags  |= IFF_LINK2;
 #endif
 	GIF2IFP(sc)->if_ioctl  = gif_ioctl;
-	GIF2IFP(sc)->if_start  = gif_start;
+	GIF2IFP(sc)->if_transmit  = gif_transmit;
+	GIF2IFP(sc)->if_qflush  = gif_qflush;
 	GIF2IFP(sc)->if_output = gif_output;
-	GIF2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
+	GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
+	GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
 	if_attach(GIF2IFP(sc));
 	bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
 	if (ng_gif_attach_p != NULL)
 		(*ng_gif_attach_p)(GIF2IFP(sc));
 
-	mtx_lock(&gif_mtx);
+	GIF_LIST_LOCK();
 	LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
-	mtx_unlock(&gif_mtx);
-
+	GIF_LIST_UNLOCK();
 	return (0);
 }
 
 static void
-gif_clone_destroy(ifp)
-	struct ifnet *ifp;
+gif_clone_destroy(struct ifnet *ifp)
 {
-#if defined(INET) || defined(INET6)
-	int err;
-#endif
-	struct gif_softc *sc = ifp->if_softc;
+	struct gif_softc *sc;
 
-	mtx_lock(&gif_mtx);
+	sx_xlock(&gif_ioctl_sx);
+	sc = ifp->if_softc;
+	gif_delete_tunnel(ifp);
+	GIF_LIST_LOCK();
 	LIST_REMOVE(sc, gif_list);
-	mtx_unlock(&gif_mtx);
-
-	gif_delete_tunnel(ifp);
-#ifdef INET6
-	if (sc->encap_cookie6 != NULL) {
-		err = encap_detach(sc->encap_cookie6);
-		KASSERT(err == 0, ("Unexpected error detaching encap_cookie6"));
-	}
-#endif
-#ifdef INET
-	if (sc->encap_cookie4 != NULL) {
-		err = encap_detach(sc->encap_cookie4);
-		KASSERT(err == 0, ("Unexpected error detaching encap_cookie4"));
-	}
-#endif
-
+	GIF_LIST_UNLOCK();
 	if (ng_gif_detach_p != NULL)
 		(*ng_gif_detach_p)(ifp);
 	bpfdetach(ifp);
 	if_detach(ifp);
+	ifp->if_softc = NULL;
+	sx_xunlock(&gif_ioctl_sx);
+
 	if_free(ifp);
-
 	GIF_LOCK_DESTROY(sc);
-
 	free(sc, M_GIF);
 }
 
@@ -243,31 +238,35 @@
 {
 
 	LIST_INIT(&V_gif_softc_list);
+	GIF_LIST_LOCK_INIT();
+	V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
+	    gif_clone_destroy, 0);
 }
-VNET_SYSINIT(vnet_gif_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, vnet_gif_init,
-    NULL);
+VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_gif_init, NULL);
 
+static void
+vnet_gif_uninit(const void *unused __unused)
+{
+
+	if_clone_detach(V_gif_cloner);
+	GIF_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_gif_uninit, NULL);
+
 static int
-gifmodevent(mod, type, data)
-	module_t mod;
-	int type;
-	void *data;
+gifmodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
-		mtx_init(&gif_mtx, "gif_mtx", NULL, MTX_DEF);
-		if_clone_attach(&gif_cloner);
-		break;
-
 	case MOD_UNLOAD:
-		if_clone_detach(&gif_cloner);
-		mtx_destroy(&gif_mtx);
 		break;
 	default:
-		return EOPNOTSUPP;
+		return (EOPNOTSUPP);
 	}
-	return 0;
+	return (0);
 }
 
 static moduledata_t gif_mod = {
@@ -280,113 +279,192 @@
 MODULE_VERSION(if_gif, 1);
 
 int
-gif_encapcheck(m, off, proto, arg)
-	const struct mbuf *m;
-	int off;
-	int proto;
-	void *arg;
+gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
-	struct ip ip;
+	GIF_RLOCK_TRACKER;
 	struct gif_softc *sc;
+	int ret;
+	uint8_t ver;
 
 	sc = (struct gif_softc *)arg;
-	if (sc == NULL)
-		return 0;
+	if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
+		return (0);
 
-	if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
-		return 0;
+	ret = 0;
+	GIF_RLOCK(sc);
 
 	/* no physical address */
-	if (!sc->gif_psrc || !sc->gif_pdst)
-		return 0;
+	if (sc->gif_family == 0)
+		goto done;
 
 	switch (proto) {
 #ifdef INET
 	case IPPROTO_IPV4:
-		break;
 #endif
 #ifdef INET6
 	case IPPROTO_IPV6:
-		break;
 #endif
 	case IPPROTO_ETHERIP:
 		break;
-
 	default:
-		return 0;
+		goto done;
 	}
 
 	/* Bail on short packets */
-	if (m->m_pkthdr.len < sizeof(ip))
-		return 0;
+	if (m->m_pkthdr.len < sizeof(struct ip))
+		goto done;
 
-	m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
-
-	switch (ip.ip_v) {
+	m_copydata(m, 0, 1, &ver);
+	switch (ver >> 4) {
 #ifdef INET
 	case 4:
-		if (sc->gif_psrc->sa_family != AF_INET ||
-		    sc->gif_pdst->sa_family != AF_INET)
-			return 0;
-		return gif_encapcheck4(m, off, proto, arg);
+		if (sc->gif_family != AF_INET)
+			goto done;
+		ret = in_gif_encapcheck(m, off, proto, arg);
+		break;
 #endif
 #ifdef INET6
 	case 6:
 		if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
-			return 0;
-		if (sc->gif_psrc->sa_family != AF_INET6 ||
-		    sc->gif_pdst->sa_family != AF_INET6)
-			return 0;
-		return gif_encapcheck6(m, off, proto, arg);
+			goto done;
+		if (sc->gif_family != AF_INET6)
+			goto done;
+		ret = in6_gif_encapcheck(m, off, proto, arg);
+		break;
 #endif
-	default:
-		return 0;
 	}
+done:
+	GIF_RUNLOCK(sc);
+	return (ret);
 }
 
-static void
-gif_start(struct ifnet *ifp)
+static int
+gif_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct gif_softc *sc;
-	struct mbuf *m;
+	struct etherip_header *eth;
+#ifdef INET
+	struct ip *ip;
+#endif
+#ifdef INET6
+	struct ip6_hdr *ip6;
+	uint32_t t;
+#endif
+	uint32_t af;
+	uint8_t proto, ecn;
+	int error;
 
+#ifdef MAC
+	error = mac_ifnet_check_transmit(ifp, m);
+	if (error) {
+		m_freem(m);
+		goto err;
+	}
+#endif
+	error = ENETDOWN;
 	sc = ifp->if_softc;
+	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+	    (ifp->if_flags & IFF_UP) == 0 ||
+	    sc->gif_family == 0 ||
+	    (error = gif_check_nesting(ifp, m)) != 0) {
+		m_freem(m);
+		goto err;
+	}
+	/* Now pull back the af that we stashed in the csum_data. */
+	if (ifp->if_bridge)
+		af = AF_LINK;
+	else
+		af = m->m_pkthdr.csum_data;
+	m->m_flags &= ~(M_BCAST|M_MCAST);
+	M_SETFIB(m, sc->gif_fibnum);
+	BPF_MTAP2(ifp, &af, sizeof(af), m);
+	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
+	/* inner AF-specific encapsulation */
+	ecn = 0;
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		proto = IPPROTO_IPV4;
+		if (m->m_len < sizeof(struct ip))
+			m = m_pullup(m, sizeof(struct ip));
+		if (m == NULL) {
+			error = ENOBUFS;
+			goto err;
+		}
+		ip = mtod(m, struct ip *);
+		ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+		    ECN_NOCARE, &ecn, &ip->ip_tos);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		proto = IPPROTO_IPV6;
+		if (m->m_len < sizeof(struct ip6_hdr))
+			m = m_pullup(m, sizeof(struct ip6_hdr));
+		if (m == NULL) {
+			error = ENOBUFS;
+			goto err;
+		}
+		t = 0;
+		ip6 = mtod(m, struct ip6_hdr *);
+		ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+		    ECN_NOCARE, &t, &ip6->ip6_flow);
+		ecn = (ntohl(t) >> 20) & 0xff;
+		break;
+#endif
+	case AF_LINK:
+		proto = IPPROTO_ETHERIP;
+		M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
+		if (m == NULL) {
+			error = ENOBUFS;
+			goto err;
+		}
+		eth = mtod(m, struct etherip_header *);
+		eth->eip_resvh = 0;
+		eth->eip_ver = ETHERIP_VERSION;
+		eth->eip_resvl = 0;
+		break;
+	default:
+		error = EAFNOSUPPORT;
+		m_freem(m);
+		goto err;
+	}
+	/* XXX should we check if our outer source is legal? */
+	/* dispatch to output logic based on outer AF */
+	switch (sc->gif_family) {
+#ifdef INET
+	case AF_INET:
+		error = in_gif_output(ifp, m, proto, ecn);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		error = in6_gif_output(ifp, m, proto, ecn);
+		break;
+#endif
+	default:
+		m_freem(m);
+	}
+err:
+	if (error)
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+	return (error);
+}
 
-	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
-	for (;;) {
-		IFQ_DEQUEUE(&ifp->if_snd, m);
-		if (m == 0)
-			break;
+static void
+gif_qflush(struct ifnet *ifp __unused)
+{
 
-		gif_output(ifp, m, sc->gif_pdst, NULL);
-
-	}
-	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-
-	return;
 }
 
-int
-gif_output(ifp, m, dst, ro)
-	struct ifnet *ifp;
-	struct mbuf *m;
-	struct sockaddr *dst;
-	struct route *ro;
+#define	MTAG_GIF	1080679712
+static int
+gif_check_nesting(struct ifnet *ifp, struct mbuf *m)
 {
-	struct gif_softc *sc = ifp->if_softc;
 	struct m_tag *mtag;
-	int error = 0;
-	int gif_called;
-	u_int32_t af;
+	int count;
 
-#ifdef MAC
-	error = mac_ifnet_check_transmit(ifp, m);
-	if (error) {
-		m_freem(m);
-		goto end;
-	}
-#endif
-
 	/*
 	 * gif may cause infinite recursion calls when misconfigured.
 	 * We'll prevent this by detecting loops.
@@ -394,105 +472,63 @@
 	 * High nesting level may cause stack exhaustion.
 	 * We'll prevent this by introducing upper limit.
 	 */
-	gif_called = 1;
-	mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
-	while (mtag != NULL) {
+	count = 1;
+	mtag = NULL;
+	while ((mtag = m_tag_locate(m, MTAG_GIF, 0, mtag)) != NULL) {
 		if (*(struct ifnet **)(mtag + 1) == ifp) {
-			log(LOG_NOTICE,
-			    "gif_output: loop detected on %s\n",
-			    (*(struct ifnet **)(mtag + 1))->if_xname);
-			m_freem(m);
-			error = EIO;	/* is there better errno? */
-			goto end;
+			log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
+			return (EIO);
 		}
-		mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
-		gif_called++;
+		count++;
 	}
-	if (gif_called > V_max_gif_nesting) {
+	if (count > V_max_gif_nesting) {
 		log(LOG_NOTICE,
-		    "gif_output: recursively called too many times(%d)\n",
-		    gif_called);
-		m_freem(m);
-		error = EIO;	/* is there better errno? */
-		goto end;
+		    "%s: if_output recursively called too many times(%d)\n",
+		    if_name(ifp), count);
+		return (EIO);
 	}
-	mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
-	    M_NOWAIT);
-	if (mtag == NULL) {
-		m_freem(m);
-		error = ENOMEM;
-		goto end;
-	}
+	mtag = m_tag_alloc(MTAG_GIF, 0, sizeof(struct ifnet *), M_NOWAIT);
+	if (mtag == NULL)
+		return (ENOMEM);
 	*(struct ifnet **)(mtag + 1) = ifp;
 	m_tag_prepend(m, mtag);
+	return (0);
+}
 
-	m->m_flags &= ~(M_BCAST|M_MCAST);
+int
+gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+	struct route *ro)
+{
+	uint32_t af;
 
-	GIF_LOCK(sc);
-
-	if (!(ifp->if_flags & IFF_UP) ||
-	    sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
-		GIF_UNLOCK(sc);
-		m_freem(m);
-		error = ENETDOWN;
-		goto end;
-	}
-
-	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC) {
+	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
-		dst->sa_family = af;
-	}
+	else
+		af = dst->sa_family;
+	/*
+	 * Now save the af in the inbound pkt csum data, this is a cheat since
+	 * we are using the inbound csum_data field to carry the af over to
+	 * the gif_transmit() routine, avoiding using yet another mtag.
+	 */
+	m->m_pkthdr.csum_data = af;
+	return (ifp->if_transmit(ifp, m));
+}
 
-	af = dst->sa_family;
-	BPF_MTAP2(ifp, &af, sizeof(af), m);
-	ifp->if_opackets++;	
-	ifp->if_obytes += m->m_pkthdr.len;
-
-	/* override to IPPROTO_ETHERIP for bridged traffic */
-	if (ifp->if_bridge)
-		af = AF_LINK;
-
-	M_SETFIB(m, sc->gif_fibnum);
-	/* inner AF-specific encapsulation */
-
-	/* XXX should we check if our outer source is legal? */
-
-	/* dispatch to output logic based on outer AF */
-	switch (sc->gif_psrc->sa_family) {
+void
+gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
+{
+	struct etherip_header *eip;
 #ifdef INET
-	case AF_INET:
-		error = in_gif_output(ifp, af, m);
-		break;
+	struct ip *ip;
 #endif
 #ifdef INET6
-	case AF_INET6:
-		error = in6_gif_output(ifp, af, m);
-		break;
+	struct ip6_hdr *ip6;
+	uint32_t t;
 #endif
-	default:
-		m_freem(m);		
-		error = ENETDOWN;
-	}
-
-	GIF_UNLOCK(sc);
-  end:
-	if (error)
-		ifp->if_oerrors++;
-	return (error);
-}
-
-void
-gif_input(m, af, ifp)
-	struct mbuf *m;
-	int af;
-	struct ifnet *ifp;
-{
-	int isr, n;
 	struct gif_softc *sc;
-	struct etherip_header *eip;
 	struct ether_header *eh;
 	struct ifnet *oldifp;
+	int isr, n, af;
 
 	if (ifp == NULL) {
 		/* just in case */
@@ -501,6 +537,46 @@
 	}
 	sc = ifp->if_softc;
 	m->m_pkthdr.rcvif = ifp;
+	m_clrprotoflags(m);
+	switch (proto) {
+#ifdef INET
+	case IPPROTO_IPV4:
+		af = AF_INET;
+		if (m->m_len < sizeof(struct ip))
+			m = m_pullup(m, sizeof(struct ip));
+		if (m == NULL)
+			goto drop;
+		ip = mtod(m, struct ip *);
+		if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+		    ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
+			m_freem(m);
+			goto drop;
+		}
+		break;
+#endif
+#ifdef INET6
+	case IPPROTO_IPV6:
+		af = AF_INET6;
+		if (m->m_len < sizeof(struct ip6_hdr))
+			m = m_pullup(m, sizeof(struct ip6_hdr));
+		if (m == NULL)
+			goto drop;
+		t = htonl((uint32_t)ecn << 20);
+		ip6 = mtod(m, struct ip6_hdr *);
+		if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+		    ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
+			m_freem(m);
+			goto drop;
+		}
+		break;
+#endif
+	case IPPROTO_ETHERIP:
+		af = AF_LINK;
+		break;
+	default:
+		m_freem(m);
+		goto drop;
+	}
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
@@ -507,14 +583,21 @@
 #endif
 
 	if (bpf_peers_present(ifp->if_bpf)) {
-		u_int32_t af1 = af;
+		uint32_t af1 = af;
 		bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
 	}
 
+	if ((ifp->if_flags & IFF_MONITOR) != 0) {
+		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+		m_freem(m);
+		return;
+	}
+
 	if (ng_gif_input_p != NULL) {
 		(*ng_gif_input_p)(ifp, &m, af);
 		if (m == NULL)
-			return;
+			goto drop;
 	}
 
 	/*
@@ -541,34 +624,15 @@
 #endif
 	case AF_LINK:
 		n = sizeof(struct etherip_header) + sizeof(struct ether_header);
-		if (n > m->m_len) {
+		if (n > m->m_len)
 			m = m_pullup(m, n);
-			if (m == NULL) {
-				ifp->if_ierrors++;
-				return;
-			}
-		}
-
+		if (m == NULL)
+			goto drop;
 		eip = mtod(m, struct etherip_header *);
-		/* 
-		 * GIF_ACCEPT_REVETHIP (enabled by default) intentionally
-		 * accepts an EtherIP packet with revered version field in
-		 * the header.  This is a knob for backward compatibility
-		 * with FreeBSD 7.2R or prior.
-		 */
-		if (sc->gif_options & GIF_ACCEPT_REVETHIP) {
-			if (eip->eip_resvl != ETHERIP_VERSION
-			    && eip->eip_ver != ETHERIP_VERSION) {
-				/* discard unknown versions */
-				m_freem(m);
-				return;
-			}
-		} else {
-			if (eip->eip_ver != ETHERIP_VERSION) {
-				/* discard unknown versions */
-				m_freem(m);
-				return;
-			}
+		if (eip->eip_ver != ETHERIP_VERSION) {
+			/* discard unknown versions */
+			m_freem(m);
+			goto drop;
 		}
 		m_adj(m, sizeof(struct etherip_header));
 
@@ -583,7 +647,7 @@
 					m->m_flags |= M_BCAST;
 				else
 					m->m_flags |= M_MCAST;
-				ifp->if_imcasts++;
+				if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 			}
 			BRIDGE_INPUT(ifp, m);
 
@@ -608,59 +672,61 @@
 		return;
 	}
 
-	ifp->if_ipackets++;
-	ifp->if_ibytes += m->m_pkthdr.len;
+	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
+	return;
+drop:
+	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 }
 
 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
 int
-gif_ioctl(ifp, cmd, data)
-	struct ifnet *ifp;
-	u_long cmd;
-	caddr_t data;
+gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
-	struct gif_softc *sc  = ifp->if_softc;
-	struct ifreq     *ifr = (struct ifreq*)data;
-	int error = 0, size;
-	u_int	options;
+	GIF_RLOCK_TRACKER;
+	struct ifreq *ifr = (struct ifreq*)data;
 	struct sockaddr *dst, *src;
-#ifdef	SIOCSIFMTU /* xxx */
-	u_long mtu;
+	struct gif_softc *sc;
+#ifdef INET
+	struct sockaddr_in *sin = NULL;
 #endif
+#ifdef INET6
+	struct sockaddr_in6 *sin6 = NULL;
+#endif
+	u_int options;
+	int error;
 
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
-		break;
-		
-	case SIOCSIFDSTADDR:
-		break;
-
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
-		break;
-
-#ifdef	SIOCSIFMTU /* xxx */
 	case SIOCGIFMTU:
-		break;
-
+	case SIOCSIFFLAGS:
+		return (0);
 	case SIOCSIFMTU:
-		mtu = ifr->ifr_mtu;
-		if (mtu < GIF_MTU_MIN || mtu > GIF_MTU_MAX)
+		if (ifr->ifr_mtu < GIF_MTU_MIN ||
+		    ifr->ifr_mtu > GIF_MTU_MAX)
 			return (EINVAL);
-		ifp->if_mtu = mtu;
-		break;
-#endif /* SIOCSIFMTU */
-
-#ifdef INET
+		else
+			ifp->if_mtu = ifr->ifr_mtu;
+		return (0);
+	}
+	sx_xlock(&gif_ioctl_sx);
+	sc = ifp->if_softc;
+	if (sc == NULL) {
+		error = ENXIO;
+		goto bad;
+	}
+	error = 0;
+	switch (cmd) {
 	case SIOCSIFPHYADDR:
-#endif
 #ifdef INET6
 	case SIOCSIFPHYADDR_IN6:
-#endif /* INET6 */
-	case SIOCSLIFPHYADDR:
+#endif
+		error = EINVAL;
 		switch (cmd) {
 #ifdef INET
 		case SIOCSIFPHYADDR:
@@ -678,199 +744,169 @@
 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
 			break;
 #endif
-		case SIOCSLIFPHYADDR:
-			src = (struct sockaddr *)
-				&(((struct if_laddrreq *)data)->addr);
-			dst = (struct sockaddr *)
-				&(((struct if_laddrreq *)data)->dstaddr);
-			break;
 		default:
-			return EINVAL;
+			goto bad;
 		}
-
 		/* sa_family must be equal */
-		if (src->sa_family != dst->sa_family)
-			return EINVAL;
+		if (src->sa_family != dst->sa_family ||
+		    src->sa_len != dst->sa_len)
+			goto bad;
 
 		/* validate sa_len */
+		/* check sa_family looks sane for the cmd */
 		switch (src->sa_family) {
 #ifdef INET
 		case AF_INET:
 			if (src->sa_len != sizeof(struct sockaddr_in))
-				return EINVAL;
+				goto bad;
+			if (cmd != SIOCSIFPHYADDR) {
+				error = EAFNOSUPPORT;
+				goto bad;
+			}
+			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
+			    satosin(dst)->sin_addr.s_addr == INADDR_ANY) {
+				error = EADDRNOTAVAIL;
+				goto bad;
+			}
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			if (src->sa_len != sizeof(struct sockaddr_in6))
-				return EINVAL;
+				goto bad;
+			if (cmd != SIOCSIFPHYADDR_IN6) {
+				error = EAFNOSUPPORT;
+				goto bad;
+			}
+			error = EADDRNOTAVAIL;
+			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
+			    ||
+			    IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
+				goto bad;
+			/*
+			 * Check validity of the scope zone ID of the
+			 * addresses, and convert it into the kernel
+			 * internal form if necessary.
+			 */
+			error = sa6_embedscope(satosin6(src), 0);
+			if (error != 0)
+				goto bad;
+			error = sa6_embedscope(satosin6(dst), 0);
+			if (error != 0)
+				goto bad;
 			break;
 #endif
 		default:
-			return EAFNOSUPPORT;
+			error = EAFNOSUPPORT;
+			goto bad;
 		}
-		switch (dst->sa_family) {
-#ifdef INET
-		case AF_INET:
-			if (dst->sa_len != sizeof(struct sockaddr_in))
-				return EINVAL;
-			break;
-#endif
-#ifdef INET6
-		case AF_INET6:
-			if (dst->sa_len != sizeof(struct sockaddr_in6))
-				return EINVAL;
-			break;
-#endif
-		default:
-			return EAFNOSUPPORT;
-		}
-
-		/* check sa_family looks sane for the cmd */
-		switch (cmd) {
-		case SIOCSIFPHYADDR:
-			if (src->sa_family == AF_INET)
-				break;
-			return EAFNOSUPPORT;
-#ifdef INET6
-		case SIOCSIFPHYADDR_IN6:
-			if (src->sa_family == AF_INET6)
-				break;
-			return EAFNOSUPPORT;
-#endif /* INET6 */
-		case SIOCSLIFPHYADDR:
-			/* checks done in the above */
-			break;
-		}
-
-		error = gif_set_tunnel(GIF2IFP(sc), src, dst);
+		error = gif_set_tunnel(ifp, src, dst);
 		break;
-
-#ifdef SIOCDIFPHYADDR
 	case SIOCDIFPHYADDR:
-		gif_delete_tunnel(GIF2IFP(sc));
+		gif_delete_tunnel(ifp);
 		break;
-#endif
-			
 	case SIOCGIFPSRCADDR:
+	case SIOCGIFPDSTADDR:
 #ifdef INET6
 	case SIOCGIFPSRCADDR_IN6:
-#endif /* INET6 */
-		if (sc->gif_psrc == NULL) {
+	case SIOCGIFPDSTADDR_IN6:
+#endif
+		if (sc->gif_family == 0) {
 			error = EADDRNOTAVAIL;
-			goto bad;
+			break;
 		}
-		src = sc->gif_psrc;
+		GIF_RLOCK(sc);
 		switch (cmd) {
 #ifdef INET
 		case SIOCGIFPSRCADDR:
-			dst = &ifr->ifr_addr;
-			size = sizeof(ifr->ifr_addr);
+		case SIOCGIFPDSTADDR:
+			if (sc->gif_family != AF_INET) {
+				error = EADDRNOTAVAIL;
+				break;
+			}
+			sin = (struct sockaddr_in *)&ifr->ifr_addr;
+			memset(sin, 0, sizeof(*sin));
+			sin->sin_family = AF_INET;
+			sin->sin_len = sizeof(*sin);
 			break;
-#endif /* INET */
+#endif
 #ifdef INET6
 		case SIOCGIFPSRCADDR_IN6:
-			dst = (struct sockaddr *)
+		case SIOCGIFPDSTADDR_IN6:
+			if (sc->gif_family != AF_INET6) {
+				error = EADDRNOTAVAIL;
+				break;
+			}
+			sin6 = (struct sockaddr_in6 *)
 				&(((struct in6_ifreq *)data)->ifr_addr);
-			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
+			memset(sin6, 0, sizeof(*sin6));
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_len = sizeof(*sin6);
 			break;
-#endif /* INET6 */
+#endif
 		default:
-			error = EADDRNOTAVAIL;
-			goto bad;
+			error = EAFNOSUPPORT;
 		}
-		if (src->sa_len > size)
-			return EINVAL;
-		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
+		if (error == 0) {
+			switch (cmd) {
+#ifdef INET
+			case SIOCGIFPSRCADDR:
+				sin->sin_addr = sc->gif_iphdr->ip_src;
+				break;
+			case SIOCGIFPDSTADDR:
+				sin->sin_addr = sc->gif_iphdr->ip_dst;
+				break;
+#endif
 #ifdef INET6
-		if (dst->sa_family == AF_INET6) {
-			error = sa6_recoverscope((struct sockaddr_in6 *)dst);
-			if (error != 0)
-				return (error);
-		}
+			case SIOCGIFPSRCADDR_IN6:
+				sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
+				break;
+			case SIOCGIFPDSTADDR_IN6:
+				sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
+				break;
 #endif
-		break;
-			
-	case SIOCGIFPDSTADDR:
-#ifdef INET6
-	case SIOCGIFPDSTADDR_IN6:
-#endif /* INET6 */
-		if (sc->gif_pdst == NULL) {
-			error = EADDRNOTAVAIL;
-			goto bad;
+			}
 		}
-		src = sc->gif_pdst;
+		GIF_RUNLOCK(sc);
+		if (error != 0)
+			break;
 		switch (cmd) {
 #ifdef INET
+		case SIOCGIFPSRCADDR:
 		case SIOCGIFPDSTADDR:
-			dst = &ifr->ifr_addr;
-			size = sizeof(ifr->ifr_addr);
+			error = prison_if(curthread->td_ucred,
+			    (struct sockaddr *)sin);
+			if (error != 0)
+				memset(sin, 0, sizeof(*sin));
 			break;
-#endif /* INET */
+#endif
 #ifdef INET6
+		case SIOCGIFPSRCADDR_IN6:
 		case SIOCGIFPDSTADDR_IN6:
-			dst = (struct sockaddr *)
-				&(((struct in6_ifreq *)data)->ifr_addr);
-			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
-			break;
-#endif /* INET6 */
-		default:
-			error = EADDRNOTAVAIL;
-			goto bad;
-		}
-		if (src->sa_len > size)
-			return EINVAL;
-		error = prison_if(curthread->td_ucred, src);
-		if (error != 0)
-			return (error);
-		error = prison_if(curthread->td_ucred, dst);
-		if (error != 0)
-			return (error);
-		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
-#ifdef INET6
-		if (dst->sa_family == AF_INET6) {
-			error = sa6_recoverscope((struct sockaddr_in6 *)dst);
+			error = prison_if(curthread->td_ucred,
+			    (struct sockaddr *)sin6);
+			if (error == 0)
+				error = sa6_recoverscope(sin6);
 			if (error != 0)
-				return (error);
-		}
+				memset(sin6, 0, sizeof(*sin6));
 #endif
-		break;
-
-	case SIOCGLIFPHYADDR:
-		if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
-			error = EADDRNOTAVAIL;
-			goto bad;
 		}
-
-		/* copy src */
-		src = sc->gif_psrc;
-		dst = (struct sockaddr *)
-			&(((struct if_laddrreq *)data)->addr);
-		size = sizeof(((struct if_laddrreq *)data)->addr);
-		if (src->sa_len > size)
-			return EINVAL;
-		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
-
-		/* copy dst */
-		src = sc->gif_pdst;
-		dst = (struct sockaddr *)
-			&(((struct if_laddrreq *)data)->dstaddr);
-		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
-		if (src->sa_len > size)
-			return EINVAL;
-		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
 		break;
-
-	case SIOCSIFFLAGS:
-		/* if_ioctl() takes care of it */
+	case SIOCGTUNFIB:
+		ifr->ifr_fib = sc->gif_fibnum;
 		break;
-
+	case SIOCSTUNFIB:
+		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
+			break;
+		if (ifr->ifr_fib >= rt_numfibs)
+			error = EINVAL;
+		else
+			sc->gif_fibnum = ifr->ifr_fib;
+		break;
 	case GIFGOPTS:
 		options = sc->gif_options;
-		error = copyout(&options, ifr->ifr_data,
-				sizeof(options));
+		error = copyout(&options, ifr->ifr_data, sizeof(options));
 		break;
-
 	case GIFSOPTS:
 		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
 			break;
@@ -882,151 +918,154 @@
 		else
 			sc->gif_options = options;
 		break;
-
 	default:
 		error = EINVAL;
 		break;
 	}
- bad:
-	return error;
+bad:
+	sx_xunlock(&gif_ioctl_sx);
+	return (error);
 }
 
-/*
- * XXXRW: There's a general event-ordering issue here: the code to check
- * if a given tunnel is already present happens before we perform a
- * potentially blocking setup of the tunnel.  This code needs to be
- * re-ordered so that the check and replacement can be atomic using
- * a mutex.
- */
-int
-gif_set_tunnel(ifp, src, dst)
-	struct ifnet *ifp;
-	struct sockaddr *src;
-	struct sockaddr *dst;
+static void
+gif_detach(struct gif_softc *sc)
 {
-	struct gif_softc *sc = ifp->if_softc;
-	struct gif_softc *sc2;
-	struct sockaddr *osrc, *odst, *sa;
-	int error = 0; 
 
-	mtx_lock(&gif_mtx);
-	LIST_FOREACH(sc2, &V_gif_softc_list, gif_list) {
-		if (sc2 == sc)
-			continue;
-		if (!sc2->gif_pdst || !sc2->gif_psrc)
-			continue;
-		if (sc2->gif_pdst->sa_family != dst->sa_family ||
-		    sc2->gif_pdst->sa_len != dst->sa_len ||
-		    sc2->gif_psrc->sa_family != src->sa_family ||
-		    sc2->gif_psrc->sa_len != src->sa_len)
-			continue;
+	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
+	if (sc->gif_ecookie != NULL)
+		encap_detach(sc->gif_ecookie);
+	sc->gif_ecookie = NULL;
+}
 
-		/*
-		 * Disallow parallel tunnels unless instructed
-		 * otherwise.
-		 */
-		if (!V_parallel_tunnels &&
-		    bcmp(sc2->gif_pdst, dst, dst->sa_len) == 0 &&
-		    bcmp(sc2->gif_psrc, src, src->sa_len) == 0) {
-			error = EADDRNOTAVAIL;
-			mtx_unlock(&gif_mtx);
-			goto bad;
-		}
+static int
+gif_attach(struct gif_softc *sc, int af)
+{
 
-		/* XXX both end must be valid? (I mean, not 0.0.0.0) */
+	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		return (in_gif_attach(sc));
+#endif
+#ifdef INET6
+	case AF_INET6:
+		return (in6_gif_attach(sc));
+#endif
 	}
-	mtx_unlock(&gif_mtx);
+	return (EAFNOSUPPORT);
+}
 
-	/* XXX we can detach from both, but be polite just in case */
-	if (sc->gif_psrc)
-		switch (sc->gif_psrc->sa_family) {
+static int
+gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
+{
+	struct gif_softc *sc = ifp->if_softc;
+	struct gif_softc *tsc;
 #ifdef INET
-		case AF_INET:
-			(void)in_gif_detach(sc);
-			break;
+	struct ip *ip;
 #endif
 #ifdef INET6
-		case AF_INET6:
-			(void)in6_gif_detach(sc);
-			break;
+	struct ip6_hdr *ip6;
 #endif
-		}
+	void *hdr;
+	int error = 0;
 
-	osrc = sc->gif_psrc;
-	sa = (struct sockaddr *)malloc(src->sa_len, M_IFADDR, M_WAITOK);
-	bcopy((caddr_t)src, (caddr_t)sa, src->sa_len);
-	sc->gif_psrc = sa;
-
-	odst = sc->gif_pdst;
-	sa = (struct sockaddr *)malloc(dst->sa_len, M_IFADDR, M_WAITOK);
-	bcopy((caddr_t)dst, (caddr_t)sa, dst->sa_len);
-	sc->gif_pdst = sa;
-
-	switch (sc->gif_psrc->sa_family) {
+	if (sc == NULL)
+		return (ENXIO);
+	/* Disallow parallel tunnels unless instructed otherwise. */
+	if (V_parallel_tunnels == 0) {
+		GIF_LIST_LOCK();
+		LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
+			if (tsc == sc || tsc->gif_family != src->sa_family)
+				continue;
 #ifdef INET
+			if (tsc->gif_family == AF_INET &&
+			    tsc->gif_iphdr->ip_src.s_addr ==
+			    satosin(src)->sin_addr.s_addr &&
+			    tsc->gif_iphdr->ip_dst.s_addr ==
+			    satosin(dst)->sin_addr.s_addr) {
+				error = EADDRNOTAVAIL;
+				GIF_LIST_UNLOCK();
+				goto bad;
+			}
+#endif
+#ifdef INET6
+			if (tsc->gif_family == AF_INET6 &&
+			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
+			    &satosin6(src)->sin6_addr) &&
+			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
+			    &satosin6(dst)->sin6_addr)) {
+				error = EADDRNOTAVAIL;
+				GIF_LIST_UNLOCK();
+				goto bad;
+			}
+#endif
+		}
+		GIF_LIST_UNLOCK();
+	}
+	switch (src->sa_family) {
+#ifdef INET
 	case AF_INET:
-		error = in_gif_attach(sc);
+		hdr = ip = malloc(sizeof(struct ip), M_GIF,
+		    M_WAITOK | M_ZERO);
+		ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
+		ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
-		/*
-		 * Check validity of the scope zone ID of the addresses, and
-		 * convert it into the kernel internal form if necessary.
-		 */
-		error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_psrc, 0);
-		if (error != 0)
-			break;
-		error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_pdst, 0);
-		if (error != 0)
-			break;
-		error = in6_gif_attach(sc);
+		hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
+		    M_WAITOK | M_ZERO);
+		ip6->ip6_src = satosin6(src)->sin6_addr;
+		ip6->ip6_dst = satosin6(dst)->sin6_addr;
+		ip6->ip6_vfc = IPV6_VERSION;
 		break;
 #endif
-	}
-	if (error) {
-		/* rollback */
-		free((caddr_t)sc->gif_psrc, M_IFADDR);
-		free((caddr_t)sc->gif_pdst, M_IFADDR);
-		sc->gif_psrc = osrc;
-		sc->gif_pdst = odst;
-		goto bad;
-	}
+	default:
+		return (EAFNOSUPPORT);
+	};
 
-	if (osrc)
-		free((caddr_t)osrc, M_IFADDR);
-	if (odst)
-		free((caddr_t)odst, M_IFADDR);
+	if (sc->gif_family != src->sa_family)
+		gif_detach(sc);
+	if (sc->gif_family == 0 ||
+	    sc->gif_family != src->sa_family)
+		error = gif_attach(sc, src->sa_family);
 
- bad:
-	if (sc->gif_psrc && sc->gif_pdst)
+	GIF_WLOCK(sc);
+	if (sc->gif_family != 0)
+		free(sc->gif_hdr, M_GIF);
+	sc->gif_family = src->sa_family;
+	sc->gif_hdr = hdr;
+	GIF_WUNLOCK(sc);
+#if defined(INET) || defined(INET6)
+bad:
+#endif
+	if (error == 0 && sc->gif_family != 0) {
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
-	else
+		if_link_state_change(ifp, LINK_STATE_UP);
+	} else {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-
-	return error;
+		if_link_state_change(ifp, LINK_STATE_DOWN);
+	}
+	return (error);
 }
 
-void
-gif_delete_tunnel(ifp)
-	struct ifnet *ifp;
+static void
+gif_delete_tunnel(struct ifnet *ifp)
 {
 	struct gif_softc *sc = ifp->if_softc;
+	int family;
 
-	if (sc->gif_psrc) {
-		free((caddr_t)sc->gif_psrc, M_IFADDR);
-		sc->gif_psrc = NULL;
+	if (sc == NULL)
+		return;
+
+	GIF_WLOCK(sc);
+	family = sc->gif_family;
+	sc->gif_family = 0;
+	GIF_WUNLOCK(sc);
+	if (family != 0) {
+		gif_detach(sc);
+		free(sc->gif_hdr, M_GIF);
 	}
-	if (sc->gif_pdst) {
-		free((caddr_t)sc->gif_pdst, M_IFADDR);
-		sc->gif_pdst = NULL;
-	}
-	/* it is safe to detach from both */
-#ifdef INET
-	(void)in_gif_detach(sc);
-#endif
-#ifdef INET6
-	(void)in6_gif_detach(sc);
-#endif
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+	if_link_state_change(ifp, LINK_STATE_DOWN);
 }

Modified: trunk/sys/net/if_gif.h
===================================================================
--- trunk/sys/net/if_gif.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_gif.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,5 @@
 /* $MidnightBSD$ */
-/*	$FreeBSD: stable/9/sys/net/if_gif.h 207369 2010-04-29 11:52:42Z bz $	*/
+/*	$FreeBSD: stable/10/sys/net/if_gif.h 287730 2015-09-13 01:35:40Z hrs $	*/
 /*	$KAME: if_gif.h,v 1.17 2000/09/11 11:36:41 sumikawa Exp $	*/
 
 /*-
@@ -31,21 +31,17 @@
  * SUCH DAMAGE.
  */
 
-/*
- * if_gif.h
- */
-
 #ifndef _NET_IF_GIF_H_
 #define _NET_IF_GIF_H_
 
-
 #ifdef _KERNEL
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <netinet/in.h>
-/* xxx sigh, why route have struct route instead of pointer? */
 
+struct ip;
+struct ip6_hdr;
 struct encaptab;
 
 extern	void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp,
@@ -57,35 +53,38 @@
 extern	void (*ng_gif_detach_p)(struct ifnet *ifp);
 
 struct gif_softc {
-	struct ifnet	*gif_ifp;
-	struct mtx	gif_mtx;
-	struct sockaddr	*gif_psrc; /* Physical src addr */
-	struct sockaddr	*gif_pdst; /* Physical dst addr */
+	struct ifnet		*gif_ifp;
+	struct rmlock		gif_lock;
+	const struct encaptab	*gif_ecookie;
+	int			gif_family;
+	int			gif_flags;
+	u_int			gif_fibnum;
+	u_int			gif_options;
+	void			*gif_netgraph;	/* netgraph node info */
 	union {
-		struct route  gifscr_ro;    /* xxx */
+		void		*hdr;
+		struct ip	*iphdr;
 #ifdef INET6
-		struct route_in6 gifscr_ro6; /* xxx */
+		struct ip6_hdr	*ip6hdr;
 #endif
-	} gifsc_gifscr;
-	int		gif_flags;
-	u_int		gif_fibnum;
-	const struct encaptab *encap_cookie4;
-	const struct encaptab *encap_cookie6;
-	void		*gif_netgraph;	/* ng_gif(4) netgraph node info */
-	u_int		gif_options;
-	LIST_ENTRY(gif_softc) gif_list; /* all gif's are linked */
+	} gif_uhdr;
+	LIST_ENTRY(gif_softc)	gif_list; /* all gif's are linked */
 };
 #define	GIF2IFP(sc)	((sc)->gif_ifp)
-#define	GIF_LOCK_INIT(sc)	mtx_init(&(sc)->gif_mtx, "gif softc",	\
-				     NULL, MTX_DEF)
-#define	GIF_LOCK_DESTROY(sc)	mtx_destroy(&(sc)->gif_mtx)
-#define	GIF_LOCK(sc)		mtx_lock(&(sc)->gif_mtx)
-#define	GIF_UNLOCK(sc)		mtx_unlock(&(sc)->gif_mtx)
-#define	GIF_LOCK_ASSERT(sc)	mtx_assert(&(sc)->gif_mtx, MA_OWNED)
+#define	GIF_LOCK_INIT(sc)	rm_init(&(sc)->gif_lock, "gif softc")
+#define	GIF_LOCK_DESTROY(sc)	rm_destroy(&(sc)->gif_lock)
+#define	GIF_RLOCK_TRACKER	struct rm_priotracker gif_tracker
+#define	GIF_RLOCK(sc)		rm_rlock(&(sc)->gif_lock, &gif_tracker)
+#define	GIF_RUNLOCK(sc)		rm_runlock(&(sc)->gif_lock, &gif_tracker)
+#define	GIF_RLOCK_ASSERT(sc)	rm_assert(&(sc)->gif_lock, RA_RLOCKED)
+#define	GIF_WLOCK(sc)		rm_wlock(&(sc)->gif_lock)
+#define	GIF_WUNLOCK(sc)		rm_wunlock(&(sc)->gif_lock)
+#define	GIF_WLOCK_ASSERT(sc)	rm_assert(&(sc)->gif_lock, RA_WLOCKED)
 
-#define gif_ro gifsc_gifscr.gifscr_ro
+#define	gif_iphdr	gif_uhdr.iphdr
+#define	gif_hdr		gif_uhdr.hdr
 #ifdef INET6
-#define gif_ro6 gifsc_gifscr.gifscr_ro6
+#define	gif_ip6hdr	gif_uhdr.ip6hdr
 #endif
 
 #define GIF_MTU		(1280)	/* Default MTU */
@@ -92,9 +91,6 @@
 #define	GIF_MTU_MIN	(1280)	/* Minimum MTU */
 #define	GIF_MTU_MAX	(8192)	/* Maximum MTU */
 
-#define	MTAG_GIF	1080679712
-#define	MTAG_GIF_CALLED	0
-
 struct etherip_header {
 #if BYTE_ORDER == LITTLE_ENDIAN
 	u_int	eip_resvl:4,	/* reserved */
@@ -112,20 +108,26 @@
 #define	ETHERIP_ALIGN		2
 
 /* Prototypes */
-void gif_input(struct mbuf *, int, struct ifnet *);
-int gif_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+void gif_input(struct mbuf *, struct ifnet *, int, uint8_t);
+int gif_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
 	       struct route *);
-int gif_ioctl(struct ifnet *, u_long, caddr_t);
-int gif_set_tunnel(struct ifnet *, struct sockaddr *, struct sockaddr *);
-void gif_delete_tunnel(struct ifnet *);
 int gif_encapcheck(const struct mbuf *, int, int, void *);
+#ifdef INET
+int in_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
+int in_gif_encapcheck(const struct mbuf *, int, int, void *);
+int in_gif_attach(struct gif_softc *);
+#endif
+#ifdef INET6
+int in6_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
+int in6_gif_encapcheck(const struct mbuf *, int, int, void *);
+int in6_gif_attach(struct gif_softc *);
+#endif
 #endif /* _KERNEL */
 
 #define GIFGOPTS	_IOWR('i', 150, struct ifreq)
 #define GIFSOPTS	_IOW('i', 151, struct ifreq)
 
-#define	GIF_ACCEPT_REVETHIP	0x0001
-#define	GIF_SEND_REVETHIP	0x0010
-#define	GIF_OPTMASK		(GIF_ACCEPT_REVETHIP|GIF_SEND_REVETHIP)
+#define	GIF_IGNORE_SOURCE	0x0002
+#define	GIF_OPTMASK		(GIF_IGNORE_SOURCE)
 
 #endif /* _NET_IF_GIF_H_ */

Modified: trunk/sys/net/if_gre.c
===================================================================
--- trunk/sys/net/if_gre.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_gre.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,9 +1,7 @@
 /* $MidnightBSD$ */
-/*	$NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */
-/*	 $FreeBSD: stable/9/sys/net/if_gre.c 248085 2013-03-09 02:36:32Z marius $ */
-
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae at FreeBSD.org>
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -31,18 +29,13 @@
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $
  */
 
-/*
- * Encapsulate L3 protocols into IP
- * See RFC 2784 (successor of RFC 1701 and 1702) for more details.
- * If_gre is compatible with Cisco GRE tunnels, so you can
- * have a NetBSD box as the other end of a tunnel interface of a Cisco
- * router. See gre(4) for more details.
- * Also supported:  IP in IP encaps (proto 55) as of RFC 2004
- */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/net/if_gre.c 293410 2016-01-08 02:58:10Z araujo $");
 
-#include "opt_atalk.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
@@ -49,6 +42,7 @@
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
+#include <sys/lock.h>
 #include <sys/libkern.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
@@ -56,98 +50,77 @@
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
+#include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
+#include <sys/sx.h>
 #include <sys/sysctl.h>
+#include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_clone.h>
+#include <net/if_var.h>
 #include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/vnet.h>
 #include <net/route.h>
-#include <net/vnet.h>
 
+#include <netinet/in.h>
 #ifdef INET
-#include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
-#include <netinet/ip_gre.h>
 #include <netinet/ip_var.h>
-#include <netinet/ip_encap.h>
-#else
-#error "Huh? if_gre without inet?"
 #endif
 
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/scope6_var.h>
+#endif
+
+#include <netinet/ip_encap.h>
 #include <net/bpf.h>
-
 #include <net/if_gre.h>
 
-/*
- * It is not easy to calculate the right value for a GRE MTU.
- * We leave this task to the admin and use the same default that
- * other vendors use.
- */
-#define GREMTU	1476
+#include <machine/in_cksum.h>
 
-#define GRENAME	"gre"
+#include <security/mac/mac_framework.h>
+#define	GREMTU			1500
+static const char grename[] = "gre";
+static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
+static VNET_DEFINE(struct mtx, gre_mtx);
+#define	V_gre_mtx	VNET(gre_mtx)
+#define	GRE_LIST_LOCK_INIT(x)		mtx_init(&V_gre_mtx, "gre_mtx", NULL, \
+					    MTX_DEF)
+#define	GRE_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_gre_mtx)
+#define	GRE_LIST_LOCK(x)		mtx_lock(&V_gre_mtx)
+#define	GRE_LIST_UNLOCK(x)		mtx_unlock(&V_gre_mtx)
 
-#define	MTAG_COOKIE_GRE		1307983903
-#define	MTAG_GRE_NESTING	1
-struct mtag_gre_nesting {
-	uint16_t	count;
-	uint16_t	max;
-	struct ifnet	*ifp[];
-};
+static VNET_DEFINE(LIST_HEAD(, gre_softc), gre_softc_list);
+#define	V_gre_softc_list	VNET(gre_softc_list)
+static struct sx gre_ioctl_sx;
+SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl");
 
-/*
- * gre_mtx protects all global variables in if_gre.c.
- * XXX: gre_softc data not protected yet.
- */
-struct mtx gre_mtx;
-static MALLOC_DEFINE(M_GRE, GRENAME, "Generic Routing Encapsulation");
-
-struct gre_softc_head gre_softc_list;
-
 static int	gre_clone_create(struct if_clone *, int, caddr_t);
 static void	gre_clone_destroy(struct ifnet *);
+static VNET_DEFINE(struct if_clone *, gre_cloner);
+#define	V_gre_cloner	VNET(gre_cloner)
+
+static void	gre_qflush(struct ifnet *);
+static int	gre_transmit(struct ifnet *, struct mbuf *);
 static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
-static int	gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
-		    struct route *ro);
+static int	gre_output(struct ifnet *, struct mbuf *,
+		    const struct sockaddr *, struct route *);
 
-IFC_SIMPLE_DECLARE(gre, 0);
+static void	gre_updatehdr(struct gre_softc *);
+static int	gre_set_tunnel(struct ifnet *, struct sockaddr *,
+    struct sockaddr *);
+static void	gre_delete_tunnel(struct ifnet *);
 
-static int gre_compute_route(struct gre_softc *sc);
-
-static void	greattach(void);
-
-#ifdef INET
-extern struct domain inetdomain;
-static const struct protosw in_gre_protosw = {
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inetdomain,
-	.pr_protocol =		IPPROTO_GRE,
-	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		gre_input,
-	.pr_output =		(pr_output_t *)rip_output,
-	.pr_ctlinput =		rip_ctlinput,
-	.pr_ctloutput =		rip_ctloutput,
-	.pr_usrreqs =		&rip_usrreqs
-};
-static const struct protosw in_mobile_protosw = {
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inetdomain,
-	.pr_protocol =		IPPROTO_MOBILE,
-	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		gre_mobile_input,
-	.pr_output =		(pr_output_t *)rip_output,
-	.pr_ctlinput =		rip_ctlinput,
-	.pr_ctloutput =		rip_ctloutput,
-	.pr_usrreqs =		&rip_usrreqs
-};
-#endif
-
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
     "Generic Routing Encapsulation");
@@ -162,795 +135,846 @@
  */
 #define MAX_GRE_NEST 1
 #endif
-static int max_gre_nesting = MAX_GRE_NEST;
-SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW,
-    &max_gre_nesting, 0, "Max nested tunnels");
 
-/* ARGSUSED */
+static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST;
+#define	V_max_gre_nesting	VNET(max_gre_nesting)
+SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
+    &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels");
+
 static void
-greattach(void)
+vnet_gre_init(const void *unused __unused)
 {
+	LIST_INIT(&V_gre_softc_list);
+	GRE_LIST_LOCK_INIT();
+	V_gre_cloner = if_clone_simple(grename, gre_clone_create,
+	    gre_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_gre_init, NULL);
 
-	mtx_init(&gre_mtx, "gre_mtx", NULL, MTX_DEF);
-	LIST_INIT(&gre_softc_list);
-	if_clone_attach(&gre_cloner);
+static void
+vnet_gre_uninit(const void *unused __unused)
+{
+
+	if_clone_detach(V_gre_cloner);
+	GRE_LIST_LOCK_DESTROY();
 }
+VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_gre_uninit, NULL);
 
 static int
-gre_clone_create(ifc, unit, params)
-	struct if_clone *ifc;
-	int unit;
-	caddr_t params;
+gre_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct gre_softc *sc;
 
 	sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
-
+	sc->gre_fibnum = curthread->td_proc->p_fibnum;
 	GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
-	if (GRE2IFP(sc) == NULL) {
-		free(sc, M_GRE);
-		return (ENOSPC);
-	}
-
+	GRE_LOCK_INIT(sc);
 	GRE2IFP(sc)->if_softc = sc;
-	if_initname(GRE2IFP(sc), ifc->ifc_name, unit);
+	if_initname(GRE2IFP(sc), grename, unit);
 
-	GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
-	GRE2IFP(sc)->if_addrlen = 0;
-	GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */
-	GRE2IFP(sc)->if_mtu = GREMTU;
+	GRE2IFP(sc)->if_mtu = sc->gre_mtu = GREMTU;
 	GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
 	GRE2IFP(sc)->if_output = gre_output;
 	GRE2IFP(sc)->if_ioctl = gre_ioctl;
-	sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
-	sc->g_proto = IPPROTO_GRE;
-	GRE2IFP(sc)->if_flags |= IFF_LINK0;
-	sc->encap = NULL;
-	sc->gre_fibnum = curthread->td_proc->p_fibnum;
-	sc->wccp_ver = WCCP_V1;
-	sc->key = 0;
+	GRE2IFP(sc)->if_transmit = gre_transmit;
+	GRE2IFP(sc)->if_qflush = gre_qflush;
+	GRE2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
+	GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
 	if_attach(GRE2IFP(sc));
 	bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
-	mtx_lock(&gre_mtx);
-	LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
-	mtx_unlock(&gre_mtx);
+	GRE_LIST_LOCK();
+	LIST_INSERT_HEAD(&V_gre_softc_list, sc, gre_list);
+	GRE_LIST_UNLOCK();
 	return (0);
 }
 
 static void
-gre_clone_destroy(ifp)
-	struct ifnet *ifp;
+gre_clone_destroy(struct ifnet *ifp)
 {
-	struct gre_softc *sc = ifp->if_softc;
+	struct gre_softc *sc;
 
-	mtx_lock(&gre_mtx);
-	LIST_REMOVE(sc, sc_list);
-	mtx_unlock(&gre_mtx);
-
-#ifdef INET
-	if (sc->encap != NULL)
-		encap_detach(sc->encap);
-#endif
+	sx_xlock(&gre_ioctl_sx);
+	sc = ifp->if_softc;
+	gre_delete_tunnel(ifp);
+	GRE_LIST_LOCK();
+	LIST_REMOVE(sc, gre_list);
+	GRE_LIST_UNLOCK();
 	bpfdetach(ifp);
 	if_detach(ifp);
+	ifp->if_softc = NULL;
+	sx_xunlock(&gre_ioctl_sx);
+
 	if_free(ifp);
+	GRE_LOCK_DESTROY(sc);
 	free(sc, M_GRE);
 }
 
-/*
- * The output routine. Takes a packet and encapsulates it in the protocol
- * given by sc->g_proto. See also RFC 1701 and RFC 2004
- */
 static int
-gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
-	   struct route *ro)
+gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
-	int error = 0;
-	struct gre_softc *sc = ifp->if_softc;
-	struct greip *gh;
-	struct ip *ip;
-	struct m_tag *mtag;
-	struct mtag_gre_nesting *gt;
-	size_t len;
-	u_short gre_ip_id = 0;
-	uint8_t gre_ip_tos = 0;
-	u_int16_t etype = 0;
-	struct mobile_h mob_h;
-	u_int32_t af;
-	int extra = 0, max;
+	GRE_RLOCK_TRACKER;
+	struct ifreq *ifr = (struct ifreq *)data;
+	struct sockaddr *src, *dst;
+	struct gre_softc *sc;
+#ifdef INET
+	struct sockaddr_in *sin = NULL;
+#endif
+#ifdef INET6
+	struct sockaddr_in6 *sin6 = NULL;
+#endif
+	uint32_t opt;
+	int error;
 
-	/*
-	 * gre may cause infinite recursion calls when misconfigured.  High
-	 * nesting level may cause stack exhaustion.  We'll prevent this by
-	 * detecting loops and by introducing upper limit.
-	 */
-	mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL);
-	if (mtag != NULL) {
-		struct ifnet **ifp2;
-
-		gt = (struct mtag_gre_nesting *)(mtag + 1);
-		gt->count++;
-		if (gt->count > min(gt->max,max_gre_nesting)) {
-			printf("%s: hit maximum recursion limit %u on %s\n",
-				__func__, gt->count - 1, ifp->if_xname);
-			m_freem(m);
-			error = EIO;	/* is there better errno? */
-			goto end;
-		}
-
-		ifp2 = gt->ifp;
-		for (max = gt->count - 1; max > 0; max--) {
-			if (*ifp2 == ifp)
-				break;
-			ifp2++;
-		}
-		if (*ifp2 == ifp) {
-			printf("%s: detected loop with nexting %u on %s\n",
-				__func__, gt->count-1, ifp->if_xname);
-			m_freem(m);
-			error = EIO;	/* is there better errno? */
-			goto end;
-		}
-		*ifp2 = ifp;
-
-	} else {
-		/*
-		 * Given that people should NOT increase max_gre_nesting beyond
-		 * their real needs, we allocate once per packet rather than
-		 * allocating an mtag once per passing through gre.
-		 *
-		 * Note: the sysctl does not actually check for saneness, so we
-		 * limit the maximum numbers of possible recursions here.
-		 */
-		max = imin(max_gre_nesting, 256);
-		/* If someone sets the sysctl <= 0, we want at least 1. */
-		max = imax(max, 1);
-		len = sizeof(struct mtag_gre_nesting) +
-		    max * sizeof(struct ifnet *);
-		mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len,
-		    M_NOWAIT);
-		if (mtag == NULL) {
-			m_freem(m);
-			error = ENOMEM;
-			goto end;
-		}
-		gt = (struct mtag_gre_nesting *)(mtag + 1);
-		bzero(gt, len);
-		gt->count = 1;
-		gt->max = max;
-		*gt->ifp = ifp;
-		m_tag_prepend(m, mtag);
+	switch (cmd) {
+	case SIOCSIFMTU:
+		 /* XXX: */
+		if (ifr->ifr_mtu < 576)
+			return (EINVAL);
+		break;
+	case SIOCSIFADDR:
+		ifp->if_flags |= IFF_UP;
+	case SIOCSIFFLAGS:
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		return (0);
+	case GRESADDRS:
+	case GRESADDRD:
+	case GREGADDRS:
+	case GREGADDRD:
+	case GRESPROTO:
+	case GREGPROTO:
+		return (EOPNOTSUPP);
 	}
-
-	if (!((ifp->if_flags & IFF_UP) &&
-	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
-	    sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
-		m_freem(m);
-		error = ENETDOWN;
+	src = dst = NULL;
+	sx_xlock(&gre_ioctl_sx);
+	sc = ifp->if_softc;
+	if (sc == NULL) {
+		error = ENXIO;
 		goto end;
 	}
-
-	gh = NULL;
-	ip = NULL;
-
-	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC) {
-		bcopy(dst->sa_data, &af, sizeof(af));
-		dst->sa_family = af;
-	}
-
-	if (bpf_peers_present(ifp->if_bpf)) {
-		af = dst->sa_family;
-		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
-	}
-
-	m->m_flags &= ~(M_BCAST|M_MCAST);
-
-	if (sc->g_proto == IPPROTO_MOBILE) {
-		if (dst->sa_family == AF_INET) {
-			struct mbuf *m0;
-			int msiz;
-
-			ip = mtod(m, struct ip *);
-
-			/*
-			 * RFC2004 specifies that fragmented diagrams shouldn't
-			 * be encapsulated.
-			 */
-			if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
-				_IF_DROP(&ifp->if_snd);
-				m_freem(m);
-				error = EINVAL;    /* is there better errno? */
-				goto end;
-			}
-			memset(&mob_h, 0, MOB_H_SIZ_L);
-			mob_h.proto = (ip->ip_p) << 8;
-			mob_h.odst = ip->ip_dst.s_addr;
-			ip->ip_dst.s_addr = sc->g_dst.s_addr;
-
-			/*
-			 * If the packet comes from our host, we only change
-			 * the destination address in the IP header.
-			 * Else we also need to save and change the source
-			 */
-			if (in_hosteq(ip->ip_src, sc->g_src)) {
-				msiz = MOB_H_SIZ_S;
-			} else {
-				mob_h.proto |= MOB_H_SBIT;
-				mob_h.osrc = ip->ip_src.s_addr;
-				ip->ip_src.s_addr = sc->g_src.s_addr;
-				msiz = MOB_H_SIZ_L;
-			}
-			mob_h.proto = htons(mob_h.proto);
-			mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
-
-			if ((m->m_data - msiz) < m->m_pktdat) {
-				/* need new mbuf */
-				MGETHDR(m0, M_DONTWAIT, MT_DATA);
-				if (m0 == NULL) {
-					_IF_DROP(&ifp->if_snd);
-					m_freem(m);
-					error = ENOBUFS;
-					goto end;
-				}
-				m0->m_next = m;
-				m->m_data += sizeof(struct ip);
-				m->m_len -= sizeof(struct ip);
-				m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
-				m0->m_len = msiz + sizeof(struct ip);
-				m0->m_data += max_linkhdr;
-				memcpy(mtod(m0, caddr_t), (caddr_t)ip,
-				       sizeof(struct ip));
-				m = m0;
-			} else {  /* we have some space left in the old one */
-				m->m_data -= msiz;
-				m->m_len += msiz;
-				m->m_pkthdr.len += msiz;
-				bcopy(ip, mtod(m, caddr_t),
-					sizeof(struct ip));
-			}
-			ip = mtod(m, struct ip *);
-			memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
-			ip->ip_len = ntohs(ip->ip_len) + msiz;
-		} else {  /* AF_INET */
-			_IF_DROP(&ifp->if_snd);
-			m_freem(m);
-			error = EINVAL;
-			goto end;
-		}
-	} else if (sc->g_proto == IPPROTO_GRE) {
-		switch (dst->sa_family) {
-		case AF_INET:
-			ip = mtod(m, struct ip *);
-			gre_ip_tos = ip->ip_tos;
-			gre_ip_id = ip->ip_id;
-			if (sc->wccp_ver == WCCP_V2) {
-				extra = sizeof(uint32_t);
-				etype =  WCCP_PROTOCOL_TYPE;
-			} else {
-				etype = ETHERTYPE_IP;
-			}
-			break;
+	error = 0;
+	switch (cmd) {
+	case SIOCSIFMTU:
+		GRE_WLOCK(sc);
+		sc->gre_mtu = ifr->ifr_mtu;
+		gre_updatehdr(sc);
+		GRE_WUNLOCK(sc);
+		goto end;
+	case SIOCSIFPHYADDR:
 #ifdef INET6
-		case AF_INET6:
-			gre_ip_id = ip_newid();
-			etype = ETHERTYPE_IPV6;
+	case SIOCSIFPHYADDR_IN6:
+#endif
+		error = EINVAL;
+		switch (cmd) {
+#ifdef INET
+		case SIOCSIFPHYADDR:
+			src = (struct sockaddr *)
+				&(((struct in_aliasreq *)data)->ifra_addr);
+			dst = (struct sockaddr *)
+				&(((struct in_aliasreq *)data)->ifra_dstaddr);
 			break;
 #endif
-#ifdef NETATALK
-		case AF_APPLETALK:
-			etype = ETHERTYPE_ATALK;
+#ifdef INET6
+		case SIOCSIFPHYADDR_IN6:
+			src = (struct sockaddr *)
+				&(((struct in6_aliasreq *)data)->ifra_addr);
+			dst = (struct sockaddr *)
+				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
 			break;
 #endif
 		default:
-			_IF_DROP(&ifp->if_snd);
-			m_freem(m);
 			error = EAFNOSUPPORT;
 			goto end;
 		}
-			
-		/* Reserve space for GRE header + optional GRE key */
-		int hdrlen = sizeof(struct greip) + extra;
-		if (sc->key)
-			hdrlen += sizeof(uint32_t);
-		M_PREPEND(m, hdrlen, M_DONTWAIT);
-	} else {
-		_IF_DROP(&ifp->if_snd);
-		m_freem(m);
-		error = EINVAL;
-		goto end;
-	}
+		/* sa_family must be equal */
+		if (src->sa_family != dst->sa_family ||
+		    src->sa_len != dst->sa_len)
+			goto end;
 
-	if (m == NULL) {	/* mbuf allocation failed */
-		_IF_DROP(&ifp->if_snd);
-		error = ENOBUFS;
-		goto end;
-	}
-
-	M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */
-
-	gh = mtod(m, struct greip *);
-	if (sc->g_proto == IPPROTO_GRE) {
-		uint32_t *options = gh->gi_options;
-
-		memset((void *)gh, 0, sizeof(struct greip) + extra);
-		gh->gi_ptype = htons(etype);
-		gh->gi_flags = 0;
-
-		/* Add key option */
-		if (sc->key)
-		{
-			gh->gi_flags |= htons(GRE_KP);
-			*(options++) = htonl(sc->key);
-		}
-	}
-
-	gh->gi_pr = sc->g_proto;
-	if (sc->g_proto != IPPROTO_MOBILE) {
-		gh->gi_src = sc->g_src;
-		gh->gi_dst = sc->g_dst;
-		((struct ip*)gh)->ip_v = IPPROTO_IPV4;
-		((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
-		((struct ip*)gh)->ip_ttl = GRE_TTL;
-		((struct ip*)gh)->ip_tos = gre_ip_tos;
-		((struct ip*)gh)->ip_id = gre_ip_id;
-		gh->gi_len = m->m_pkthdr.len;
-	}
-
-	ifp->if_opackets++;
-	ifp->if_obytes += m->m_pkthdr.len;
-	/*
-	 * Send it off and with IP_FORWARD flag to prevent it from
-	 * overwriting the ip_id again.  ip_id is already set to the
-	 * ip_id of the encapsulated packet.
-	 */
-	error = ip_output(m, NULL, &sc->route, IP_FORWARDING,
-	    (struct ip_moptions *)NULL, (struct inpcb *)NULL);
-  end:
-	if (error)
-		ifp->if_oerrors++;
-	return (error);
-}
-
-static int
-gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
-	struct ifreq *ifr = (struct ifreq *)data;
-	struct if_laddrreq *lifr = (struct if_laddrreq *)data;
-	struct in_aliasreq *aifr = (struct in_aliasreq *)data;
-	struct gre_softc *sc = ifp->if_softc;
-	int s;
-	struct sockaddr_in si;
-	struct sockaddr *sa = NULL;
-	int error, adj;
-	struct sockaddr_in sp, sm, dp, dm;
-	uint32_t key;
-
-	error = 0;
-	adj = 0;
-
-	s = splnet();
-	switch (cmd) {
-	case SIOCSIFADDR:
-		ifp->if_flags |= IFF_UP;
-		break;
-	case SIOCSIFDSTADDR:
-		break;
-	case SIOCSIFFLAGS:
-		/*
-		 * XXXRW: Isn't this priv_check() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0)
-			break;
-		if ((ifr->ifr_flags & IFF_LINK0) != 0)
-			sc->g_proto = IPPROTO_GRE;
-		else
-			sc->g_proto = IPPROTO_MOBILE;
-		if ((ifr->ifr_flags & IFF_LINK2) != 0)
-			sc->wccp_ver = WCCP_V2;
-		else
-			sc->wccp_ver = WCCP_V1;
-		goto recompute;
-	case SIOCSIFMTU:
-		/*
-		 * XXXRW: Isn't this priv_check() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0)
-			break;
-		if (ifr->ifr_mtu < 576) {
-			error = EINVAL;
-			break;
-		}
-		ifp->if_mtu = ifr->ifr_mtu;
-		break;
-	case SIOCGIFMTU:
-		ifr->ifr_mtu = GRE2IFP(sc)->if_mtu;
-		break;
-	case SIOCADDMULTI:
-		/*
-		 * XXXRW: Isn't this priv_checkr() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0)
-			break;
-		if (ifr == 0) {
-			error = EAFNOSUPPORT;
-			break;
-		}
-		switch (ifr->ifr_addr.sa_family) {
+		/* validate sa_len */
+		switch (src->sa_family) {
 #ifdef INET
 		case AF_INET:
+			if (src->sa_len != sizeof(struct sockaddr_in))
+				goto end;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
+			if (src->sa_len != sizeof(struct sockaddr_in6))
+				goto end;
 			break;
 #endif
 		default:
 			error = EAFNOSUPPORT;
-			break;
+			goto end;
 		}
-		break;
-	case SIOCDELMULTI:
-		/*
-		 * XXXRW: Isn't this priv_check() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0)
-			break;
-		if (ifr == 0) {
-			error = EAFNOSUPPORT;
-			break;
+		/* check sa_family looks sane for the cmd */
+		error = EAFNOSUPPORT;
+		switch (cmd) {
+#ifdef INET
+		case SIOCSIFPHYADDR:
+			if (src->sa_family == AF_INET)
+				break;
+			goto end;
+#endif
+#ifdef INET6
+		case SIOCSIFPHYADDR_IN6:
+			if (src->sa_family == AF_INET6)
+				break;
+			goto end;
+#endif
 		}
-		switch (ifr->ifr_addr.sa_family) {
+		error = EADDRNOTAVAIL;
+		switch (src->sa_family) {
 #ifdef INET
 		case AF_INET:
+			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
+			    satosin(dst)->sin_addr.s_addr == INADDR_ANY)
+				goto end;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
-			break;
+			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
+			    ||
+			    IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
+				goto end;
+			/*
+			 * Check validity of the scope zone ID of the
+			 * addresses, and convert it into the kernel
+			 * internal form if necessary.
+			 */
+			error = sa6_embedscope(satosin6(src), 0);
+			if (error != 0)
+				goto end;
+			error = sa6_embedscope(satosin6(dst), 0);
+			if (error != 0)
+				goto end;
 #endif
-		default:
-			error = EAFNOSUPPORT;
+		};
+		error = gre_set_tunnel(ifp, src, dst);
+		break;
+	case SIOCDIFPHYADDR:
+		gre_delete_tunnel(ifp);
+		break;
+	case SIOCGIFPSRCADDR:
+	case SIOCGIFPDSTADDR:
+#ifdef INET6
+	case SIOCGIFPSRCADDR_IN6:
+	case SIOCGIFPDSTADDR_IN6:
+#endif
+		if (sc->gre_family == 0) {
+			error = EADDRNOTAVAIL;
 			break;
 		}
-		break;
-	case GRESPROTO:
-		/*
-		 * XXXRW: Isn't this priv_check() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
+		GRE_RLOCK(sc);
+		switch (cmd) {
+#ifdef INET
+		case SIOCGIFPSRCADDR:
+		case SIOCGIFPDSTADDR:
+			if (sc->gre_family != AF_INET) {
+				error = EADDRNOTAVAIL;
+				break;
+			}
+			sin = (struct sockaddr_in *)&ifr->ifr_addr;
+			memset(sin, 0, sizeof(*sin));
+			sin->sin_family = AF_INET;
+			sin->sin_len = sizeof(*sin);
 			break;
-		sc->g_proto = ifr->ifr_flags;
-		switch (sc->g_proto) {
-		case IPPROTO_GRE:
-			ifp->if_flags |= IFF_LINK0;
+#endif
+#ifdef INET6
+		case SIOCGIFPSRCADDR_IN6:
+		case SIOCGIFPDSTADDR_IN6:
+			if (sc->gre_family != AF_INET6) {
+				error = EADDRNOTAVAIL;
+				break;
+			}
+			sin6 = (struct sockaddr_in6 *)
+				&(((struct in6_ifreq *)data)->ifr_addr);
+			memset(sin6, 0, sizeof(*sin6));
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_len = sizeof(*sin6);
 			break;
-		case IPPROTO_MOBILE:
-			ifp->if_flags &= ~IFF_LINK0;
-			break;
-		default:
-			error = EPROTONOSUPPORT;
-			break;
+#endif
 		}
-		goto recompute;
-	case GREGPROTO:
-		ifr->ifr_flags = sc->g_proto;
-		break;
-	case GRESADDRS:
-	case GRESADDRD:
-		error = priv_check(curthread, PRIV_NET_GRE);
-		if (error)
-			return (error);
-		/*
-		 * set tunnel endpoints, compute a less specific route
-		 * to the remote end and mark if as up
-		 */
-		sa = &ifr->ifr_addr;
-		if (cmd == GRESADDRS)
-			sc->g_src = (satosin(sa))->sin_addr;
-		if (cmd == GRESADDRD)
-			sc->g_dst = (satosin(sa))->sin_addr;
-	recompute:
+		if (error == 0) {
+			switch (cmd) {
 #ifdef INET
-		if (sc->encap != NULL) {
-			encap_detach(sc->encap);
-			sc->encap = NULL;
-		}
+			case SIOCGIFPSRCADDR:
+				sin->sin_addr = sc->gre_oip.ip_src;
+				break;
+			case SIOCGIFPDSTADDR:
+				sin->sin_addr = sc->gre_oip.ip_dst;
+				break;
 #endif
-		if ((sc->g_src.s_addr != INADDR_ANY) &&
-		    (sc->g_dst.s_addr != INADDR_ANY)) {
-			bzero(&sp, sizeof(sp));
-			bzero(&sm, sizeof(sm));
-			bzero(&dp, sizeof(dp));
-			bzero(&dm, sizeof(dm));
-			sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len =
-			    sizeof(struct sockaddr_in);
-			sp.sin_family = sm.sin_family = dp.sin_family =
-			    dm.sin_family = AF_INET;
-			sp.sin_addr = sc->g_src;
-			dp.sin_addr = sc->g_dst;
-			sm.sin_addr.s_addr = dm.sin_addr.s_addr =
-			    INADDR_BROADCAST;
-#ifdef INET
-			sc->encap = encap_attach(AF_INET, sc->g_proto,
-			    sintosa(&sp), sintosa(&sm), sintosa(&dp),
-			    sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ?
-				&in_gre_protosw : &in_mobile_protosw, sc);
-			if (sc->encap == NULL)
-				printf("%s: unable to attach encap\n",
-				    if_name(GRE2IFP(sc)));
+#ifdef INET6
+			case SIOCGIFPSRCADDR_IN6:
+				sin6->sin6_addr = sc->gre_oip6.ip6_src;
+				break;
+			case SIOCGIFPDSTADDR_IN6:
+				sin6->sin6_addr = sc->gre_oip6.ip6_dst;
+				break;
 #endif
-			if (sc->route.ro_rt != 0) /* free old route */
-				RTFREE(sc->route.ro_rt);
-			if (gre_compute_route(sc) == 0)
-				ifp->if_drv_flags |= IFF_DRV_RUNNING;
-			else
-				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+			}
 		}
-		break;
-	case GREGADDRS:
-		memset(&si, 0, sizeof(si));
-		si.sin_family = AF_INET;
-		si.sin_len = sizeof(struct sockaddr_in);
-		si.sin_addr.s_addr = sc->g_src.s_addr;
-		sa = sintosa(&si);
-		error = prison_if(curthread->td_ucred, sa);
+		GRE_RUNLOCK(sc);
 		if (error != 0)
 			break;
-		ifr->ifr_addr = *sa;
-		break;
-	case GREGADDRD:
-		memset(&si, 0, sizeof(si));
-		si.sin_family = AF_INET;
-		si.sin_len = sizeof(struct sockaddr_in);
-		si.sin_addr.s_addr = sc->g_dst.s_addr;
-		sa = sintosa(&si);
-		error = prison_if(curthread->td_ucred, sa);
-		if (error != 0)
+		switch (cmd) {
+#ifdef INET
+		case SIOCGIFPSRCADDR:
+		case SIOCGIFPDSTADDR:
+			error = prison_if(curthread->td_ucred,
+			    (struct sockaddr *)sin);
+			if (error != 0)
+				memset(sin, 0, sizeof(*sin));
 			break;
-		ifr->ifr_addr = *sa;
+#endif
+#ifdef INET6
+		case SIOCGIFPSRCADDR_IN6:
+		case SIOCGIFPDSTADDR_IN6:
+			error = prison_if(curthread->td_ucred,
+			    (struct sockaddr *)sin6);
+			if (error == 0)
+				error = sa6_recoverscope(sin6);
+			if (error != 0)
+				memset(sin6, 0, sizeof(*sin6));
+#endif
+		}
 		break;
-	case SIOCSIFPHYADDR:
-		/*
-		 * XXXRW: Isn't this priv_check() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
+	case SIOCGTUNFIB:
+		ifr->ifr_fib = sc->gre_fibnum;
+		break;
+	case SIOCSTUNFIB:
+		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
-		if (aifr->ifra_addr.sin_family != AF_INET ||
-		    aifr->ifra_dstaddr.sin_family != AF_INET) {
-			error = EAFNOSUPPORT;
-			break;
-		}
-		if (aifr->ifra_addr.sin_len != sizeof(si) ||
-		    aifr->ifra_dstaddr.sin_len != sizeof(si)) {
+		if (ifr->ifr_fib >= rt_numfibs)
 			error = EINVAL;
+		else
+			sc->gre_fibnum = ifr->ifr_fib;
+		break;
+	case GRESKEY:
+		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
-		}
-		sc->g_src = aifr->ifra_addr.sin_addr;
-		sc->g_dst = aifr->ifra_dstaddr.sin_addr;
-		goto recompute;
-	case SIOCSLIFPHYADDR:
-		/*
-		 * XXXRW: Isn't this priv_check() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
+		if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
 			break;
-		if (lifr->addr.ss_family != AF_INET ||
-		    lifr->dstaddr.ss_family != AF_INET) {
-			error = EAFNOSUPPORT;
-			break;
+		if (sc->gre_key != opt) {
+			GRE_WLOCK(sc);
+			sc->gre_key = opt;
+			gre_updatehdr(sc);
+			GRE_WUNLOCK(sc);
 		}
-		if (lifr->addr.ss_len != sizeof(si) ||
-		    lifr->dstaddr.ss_len != sizeof(si)) {
-			error = EINVAL;
-			break;
-		}
-		sc->g_src = (satosin(&lifr->addr))->sin_addr;
-		sc->g_dst =
-		    (satosin(&lifr->dstaddr))->sin_addr;
-		goto recompute;
-	case SIOCDIFPHYADDR:
-		/*
-		 * XXXRW: Isn't this priv_check() redundant to the ifnet
-		 * layer check?
-		 */
-		if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
-			break;
-		sc->g_src.s_addr = INADDR_ANY;
-		sc->g_dst.s_addr = INADDR_ANY;
-		goto recompute;
-	case SIOCGLIFPHYADDR:
-		if (sc->g_src.s_addr == INADDR_ANY ||
-		    sc->g_dst.s_addr == INADDR_ANY) {
-			error = EADDRNOTAVAIL;
-			break;
-		}
-		memset(&si, 0, sizeof(si));
-		si.sin_family = AF_INET;
-		si.sin_len = sizeof(struct sockaddr_in);
-		si.sin_addr.s_addr = sc->g_src.s_addr;
-		error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
-		if (error != 0)
-			break;
-		memcpy(&lifr->addr, &si, sizeof(si));
-		si.sin_addr.s_addr = sc->g_dst.s_addr;
-		error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
-		if (error != 0)
-			break;
-		memcpy(&lifr->dstaddr, &si, sizeof(si));
 		break;
-	case SIOCGIFPSRCADDR:
-#ifdef INET6
-	case SIOCGIFPSRCADDR_IN6:
-#endif
-		if (sc->g_src.s_addr == INADDR_ANY) {
-			error = EADDRNOTAVAIL;
-			break;
-		}
-		memset(&si, 0, sizeof(si));
-		si.sin_family = AF_INET;
-		si.sin_len = sizeof(struct sockaddr_in);
-		si.sin_addr.s_addr = sc->g_src.s_addr;
-		error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
-		if (error != 0)
-			break;
-		bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
+	case GREGKEY:
+		error = copyout(&sc->gre_key, ifr->ifr_data,
+		    sizeof(sc->gre_key));
 		break;
-	case SIOCGIFPDSTADDR:
-#ifdef INET6
-	case SIOCGIFPDSTADDR_IN6:
-#endif
-		if (sc->g_dst.s_addr == INADDR_ANY) {
-			error = EADDRNOTAVAIL;
+	case GRESOPTS:
+		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
-		}
-		memset(&si, 0, sizeof(si));
-		si.sin_family = AF_INET;
-		si.sin_len = sizeof(struct sockaddr_in);
-		si.sin_addr.s_addr = sc->g_dst.s_addr;
-		error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
-		if (error != 0)
+		if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
 			break;
-		bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
-		break;
-	case GRESKEY:
-		error = priv_check(curthread, PRIV_NET_GRE);
-		if (error)
-			break;
-		error = copyin(ifr->ifr_data, &key, sizeof(key));
-		if (error)
-			break;
-		/* adjust MTU for option header */
-		if (key == 0 && sc->key != 0)		/* clear */
-			adj += sizeof(key);
-		else if (key != 0 && sc->key == 0)	/* set */
-			adj -= sizeof(key);
-
-		if (ifp->if_mtu + adj < 576) {
+		if (opt & ~GRE_OPTMASK)
 			error = EINVAL;
-			break;
+		else {
+			if (sc->gre_options != opt) {
+				GRE_WLOCK(sc);
+				sc->gre_options = opt;
+				gre_updatehdr(sc);
+				GRE_WUNLOCK(sc);
+			}
 		}
-		ifp->if_mtu += adj;
-		sc->key = key;
 		break;
-	case GREGKEY:
-		error = copyout(&sc->key, ifr->ifr_data, sizeof(sc->key));
+
+	case GREGOPTS:
+		error = copyout(&sc->gre_options, ifr->ifr_data,
+		    sizeof(sc->gre_options));
 		break;
-
 	default:
 		error = EINVAL;
 		break;
 	}
-
-	splx(s);
+end:
+	sx_xunlock(&gre_ioctl_sx);
 	return (error);
 }
 
-/*
- * computes a route to our destination that is not the one
- * which would be taken by ip_output(), as this one will loop back to
- * us. If the interface is p2p as  a--->b, then a routing entry exists
- * If we now send a packet to b (e.g. ping b), this will come down here
- * gets src=a, dst=b tacked on and would from ip_output() sent back to
- * if_gre.
- * Goal here is to compute a route to b that is less specific than
- * a-->b. We know that this one exists as in normal operation we have
- * at least a default route which matches.
- */
+static void
+gre_updatehdr(struct gre_softc *sc)
+{
+	struct grehdr *gh = NULL;
+	uint32_t *opts;
+	uint16_t flags;
+
+	GRE_WLOCK_ASSERT(sc);
+	switch (sc->gre_family) {
+#ifdef INET
+	case AF_INET:
+		sc->gre_hlen = sizeof(struct greip);
+		sc->gre_oip.ip_v = IPPROTO_IPV4;
+		sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
+		sc->gre_oip.ip_p = IPPROTO_GRE;
+		gh = &sc->gre_gihdr->gi_gre;
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		sc->gre_hlen = sizeof(struct greip6);
+		sc->gre_oip6.ip6_vfc = IPV6_VERSION;
+		sc->gre_oip6.ip6_nxt = IPPROTO_GRE;
+		gh = &sc->gre_gi6hdr->gi6_gre;
+		break;
+#endif
+	default:
+		return;
+	}
+	flags = 0;
+	opts = gh->gre_opts;
+	if (sc->gre_options & GRE_ENABLE_CSUM) {
+		flags |= GRE_FLAGS_CP;
+		sc->gre_hlen += 2 * sizeof(uint16_t);
+		*opts++ = 0;
+	}
+	if (sc->gre_key != 0) {
+		flags |= GRE_FLAGS_KP;
+		sc->gre_hlen += sizeof(uint32_t);
+		*opts++ = htonl(sc->gre_key);
+	}
+	if (sc->gre_options & GRE_ENABLE_SEQ) {
+		flags |= GRE_FLAGS_SP;
+		sc->gre_hlen += sizeof(uint32_t);
+		*opts++ = 0;
+	} else
+		sc->gre_oseq = 0;
+	gh->gre_flags = htons(flags);
+	GRE2IFP(sc)->if_mtu = sc->gre_mtu - sc->gre_hlen;
+}
+
+static void
+gre_detach(struct gre_softc *sc)
+{
+
+	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+	if (sc->gre_ecookie != NULL)
+		encap_detach(sc->gre_ecookie);
+	sc->gre_ecookie = NULL;
+}
+
 static int
-gre_compute_route(struct gre_softc *sc)
+gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src,
+    struct sockaddr *dst)
 {
-	struct route *ro;
+	struct gre_softc *sc, *tsc;
+#ifdef INET6
+	struct ip6_hdr *ip6;
+#endif
+#ifdef INET
+	struct ip *ip;
+#endif
+	void *hdr;
+	int error;
 
-	ro = &sc->route;
+	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+	GRE_LIST_LOCK();
+	sc = ifp->if_softc;
+	LIST_FOREACH(tsc, &V_gre_softc_list, gre_list) {
+		if (tsc == sc || tsc->gre_family != src->sa_family)
+			continue;
+#ifdef INET
+		if (tsc->gre_family == AF_INET &&
+		    tsc->gre_oip.ip_src.s_addr ==
+		    satosin(src)->sin_addr.s_addr &&
+		    tsc->gre_oip.ip_dst.s_addr ==
+		    satosin(dst)->sin_addr.s_addr) {
+			GRE_LIST_UNLOCK();
+			return (EADDRNOTAVAIL);
+		}
+#endif
+#ifdef INET6
+		if (tsc->gre_family == AF_INET6 &&
+		    IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_src,
+		    &satosin6(src)->sin6_addr) &&
+		    IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_dst,
+			&satosin6(dst)->sin6_addr)) {
+			GRE_LIST_UNLOCK();
+			return (EADDRNOTAVAIL);
+		}
+#endif
+	}
+	GRE_LIST_UNLOCK();
 
-	memset(ro, 0, sizeof(struct route));
-	((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
-	ro->ro_dst.sa_family = AF_INET;
-	ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+	error = 0;
+	switch (src->sa_family) {
+#ifdef INET
+	case AF_INET:
+		hdr = ip = malloc(sizeof(struct greip) +
+		    3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
+		ip->ip_src = satosin(src)->sin_addr;
+		ip->ip_dst = satosin(dst)->sin_addr;
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		hdr = ip6 = malloc(sizeof(struct greip6) +
+		    3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
+		ip6->ip6_src = satosin6(src)->sin6_addr;
+		ip6->ip6_dst = satosin6(dst)->sin6_addr;
+		break;
+#endif
+	default:
+		return (EAFNOSUPPORT);
+	}
+	if (sc->gre_family != 0)
+		gre_detach(sc);
+	GRE_WLOCK(sc);
+	if (sc->gre_family != 0)
+		free(sc->gre_hdr, M_GRE);
+	sc->gre_family = src->sa_family;
+	sc->gre_hdr = hdr;
+	sc->gre_oseq = 0;
+	sc->gre_iseq = UINT32_MAX;
+	gre_updatehdr(sc);
+	GRE_WUNLOCK(sc);
 
-	/*
-	 * toggle last bit, so our interface is not found, but a less
-	 * specific route. I'd rather like to specify a shorter mask,
-	 * but this is not possible. Should work though. XXX
-	 * XXX MRT Use a different FIB for the tunnel to solve this problem.
-	 */
-	if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) {
-		((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^=
-		    htonl(0x01);
+	switch (src->sa_family) {
+#ifdef INET
+	case AF_INET:
+		error = in_gre_attach(sc);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		error = in6_gre_attach(sc);
+		break;
+#endif
 	}
+	if (error == 0) {
+		ifp->if_drv_flags |= IFF_DRV_RUNNING;
+		if_link_state_change(ifp, LINK_STATE_UP);
+	}
+	return (error);
+}
 
-#ifdef DIAGNOSTIC
-	printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)),
-	    inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
+static void
+gre_delete_tunnel(struct ifnet *ifp)
+{
+	struct gre_softc *sc = ifp->if_softc;
+	int family;
+
+	GRE_WLOCK(sc);
+	family = sc->gre_family;
+	sc->gre_family = 0;
+	GRE_WUNLOCK(sc);
+	if (family != 0) {
+		gre_detach(sc);
+		free(sc->gre_hdr, M_GRE);
+	}
+	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+	if_link_state_change(ifp, LINK_STATE_DOWN);
+}
+
+int
+gre_input(struct mbuf **mp, int *offp, int proto)
+{
+	struct gre_softc *sc;
+	struct grehdr *gh;
+	struct ifnet *ifp;
+	struct mbuf *m;
+	uint32_t *opts;
+#ifdef notyet
+    uint32_t key;
 #endif
+	uint16_t flags;
+	int hlen, isr, af;
 
-	rtalloc_fib(ro, sc->gre_fibnum);
+	m = *mp;
+	sc = encap_getarg(m);
+	KASSERT(sc != NULL, ("encap_getarg returned NULL"));
 
-	/*
-	 * check if this returned a route at all and this route is no
-	 * recursion to ourself
-	 */
-	if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
-#ifdef DIAGNOSTIC
-		if (ro->ro_rt == NULL)
-			printf(" - no route found!\n");
-		else
-			printf(" - route loops back to ourself!\n");
+	ifp = GRE2IFP(sc);
+	hlen = *offp + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
+	if (m->m_pkthdr.len < hlen)
+		goto drop;
+	if (m->m_len < hlen) {
+		m = m_pullup(m, hlen);
+		if (m == NULL)
+			goto drop;
+	}
+	gh = (struct grehdr *)mtodo(m, *offp);
+	flags = ntohs(gh->gre_flags);
+	if (flags & ~GRE_FLAGS_MASK)
+		goto drop;
+	opts = gh->gre_opts;
+	hlen = 2 * sizeof(uint16_t);
+	if (flags & GRE_FLAGS_CP) {
+		/* reserved1 field must be zero */
+		if (((uint16_t *)opts)[1] != 0)
+			goto drop;
+		if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0)
+			goto drop;
+		hlen += 2 * sizeof(uint16_t);
+		opts++;
+	}
+	if (flags & GRE_FLAGS_KP) {
+#ifdef notyet
+        /*
+         * XXX: The current implementation uses the key only for outgoing
+         * packets. But we can check the key value here, or even in the
+         * encapcheck function.
+         */
+		key = ntohl(*opts);
 #endif
-		return EADDRNOTAVAIL;
+		hlen += sizeof(uint32_t);
+		opts++;
+    }
+#ifdef notyet
+	} else
+		key = 0;
+	if (sc->gre_key != 0 && (key != sc->gre_key || key != 0))
+		goto drop;
+#endif
+	if (flags & GRE_FLAGS_SP) {
+#ifdef notyet
+		seq = ntohl(*opts);
+#endif
+		hlen += sizeof(uint32_t);
 	}
+	switch (ntohs(gh->gre_proto)) {
+	case ETHERTYPE_WCCP:
+		/*
+		 * For WCCP skip an additional 4 bytes if after GRE header
+		 * doesn't follow an IP header.
+		 */
+		if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40)
+			hlen += sizeof(uint32_t);
+		/* FALLTHROUGH */
+	case ETHERTYPE_IP:
+		isr = NETISR_IP;
+		af = AF_INET;
+		break;
+	case ETHERTYPE_IPV6:
+		isr = NETISR_IPV6;
+		af = AF_INET6;
+		break;
+	default:
+		goto drop;
+	}
+	m_adj(m, *offp + hlen);
+	m_clrprotoflags(m);
+	m->m_pkthdr.rcvif = ifp;
+	M_SETFIB(m, ifp->if_fib);
+#ifdef MAC
+	mac_ifnet_create_mbuf(ifp, m);
+#endif
+	BPF_MTAP2(ifp, &af, sizeof(af), m);
+	ifp->if_ipackets++;
+	ifp->if_ibytes += m->m_pkthdr.len;
+	if ((ifp->if_flags & IFF_MONITOR) != 0)
+		m_freem(m);
+	else
+		netisr_dispatch(isr, m);
+	return (IPPROTO_DONE);
+drop:
+	ifp->if_ierrors++;
+	m_freem(m);
+	return (IPPROTO_DONE);
+}
 
-	/*
-	 * now change it back - else ip_output will just drop
-	 * the route and search one to this interface ...
-	 */
-	if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0)
-		((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
+#define	MTAG_GRE	1307983903
+static int
+gre_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+	struct m_tag *mtag;
+	int count;
 
-#ifdef DIAGNOSTIC
-	printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp),
-	    inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
-	printf("\n");
+	count = 1;
+	mtag = NULL;
+	while ((mtag = m_tag_locate(m, MTAG_GRE, 0, mtag)) != NULL) {
+		if (*(struct ifnet **)(mtag + 1) == ifp) {
+			log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
+			return (EIO);
+		}
+		count++;
+	}
+	if (count > V_max_gre_nesting) {
+		log(LOG_NOTICE,
+		    "%s: if_output recursively called too many times(%d)\n",
+		    ifp->if_xname, count);
+		return (EIO);
+	}
+	mtag = m_tag_alloc(MTAG_GRE, 0, sizeof(struct ifnet *), M_NOWAIT);
+	if (mtag == NULL)
+		return (ENOMEM);
+	*(struct ifnet **)(mtag + 1) = ifp;
+	m_tag_prepend(m, mtag);
+	return (0);
+}
+
+static int
+gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+   struct route *ro)
+{
+	uint32_t af;
+	int error;
+
+#ifdef MAC
+	error = mac_ifnet_check_transmit(ifp, m);
+	if (error != 0)
+		goto drop;
 #endif
+	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+	    (ifp->if_flags & IFF_UP) == 0) {
+		error = ENETDOWN;
+		goto drop;
+	}
 
-	return 0;
+	error = gre_check_nesting(ifp, m);
+	if (error != 0)
+		goto drop;
+
+	m->m_flags &= ~(M_BCAST|M_MCAST);
+	if (dst->sa_family == AF_UNSPEC)
+		bcopy(dst->sa_data, &af, sizeof(af));
+	else
+		af = dst->sa_family;
+	BPF_MTAP2(ifp, &af, sizeof(af), m);
+	m->m_pkthdr.csum_data = af;	/* save af for if_transmit */
+	return (ifp->if_transmit(ifp, m));
+drop:
+	m_freem(m);
+	ifp->if_oerrors++;
+	return (error);
 }
 
-/*
- * do a checksum of a buffer - much like in_cksum, which operates on
- * mbufs.
- */
-u_int16_t
-gre_in_cksum(u_int16_t *p, u_int len)
+static void
+gre_setseqn(struct grehdr *gh, uint32_t seq)
 {
-	u_int32_t sum = 0;
-	int nwords = len >> 1;
+	uint32_t *opts;
+	uint16_t flags;
 
-	while (nwords-- != 0)
-		sum += *p++;
+	opts = gh->gre_opts;
+	flags = ntohs(gh->gre_flags);
+	KASSERT((flags & GRE_FLAGS_SP) != 0,
+	    ("gre_setseqn called, but GRE_FLAGS_SP isn't set "));
+	if (flags & GRE_FLAGS_CP)
+		opts++;
+	if (flags & GRE_FLAGS_KP)
+		opts++;
+	*opts = htonl(seq);
+}
 
-	if (len & 1) {
-		union {
-			u_short w;
-			u_char c[2];
-		} u;
-		u.c[0] = *(u_char *)p;
-		u.c[1] = 0;
-		sum += u.w;
+static int
+gre_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+	GRE_RLOCK_TRACKER;
+	struct gre_softc *sc;
+	struct grehdr *gh;
+	uint32_t iaf, oaf, oseq;
+	int error, hlen, olen, plen;
+	int want_seq, want_csum;
+
+	plen = 0;
+	sc = ifp->if_softc;
+	if (sc == NULL) {
+		error = ENETDOWN;
+		m_freem(m);
+		goto drop;
 	}
+	GRE_RLOCK(sc);
+	if (sc->gre_family == 0) {
+		GRE_RUNLOCK(sc);
+		error = ENETDOWN;
+		m_freem(m);
+		goto drop;
+	}
+	iaf = m->m_pkthdr.csum_data;
+	oaf = sc->gre_family;
+	hlen = sc->gre_hlen;
+	want_seq = (sc->gre_options & GRE_ENABLE_SEQ) != 0;
+	if (want_seq)
+		oseq = sc->gre_oseq++;
+	else
+		oseq = 0;	/* Make compiler happy. */
+	want_csum = (sc->gre_options & GRE_ENABLE_CSUM) != 0;
+	M_SETFIB(m, sc->gre_fibnum);
+	M_PREPEND(m, hlen, M_NOWAIT);
+	if (m == NULL) {
+		GRE_RUNLOCK(sc);
+		error = ENOBUFS;
+		goto drop;
+	}
+	bcopy(sc->gre_hdr, mtod(m, void *), hlen);
+	GRE_RUNLOCK(sc);
+	switch (oaf) {
+#ifdef INET
+	case AF_INET:
+		olen = sizeof(struct ip);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		olen = sizeof(struct ip6_hdr);
+		break;
+#endif
+	default:
+		error = ENETDOWN;
+		goto drop;
+	}
+	gh = (struct grehdr *)mtodo(m, olen);
+	switch (iaf) {
+#ifdef INET
+	case AF_INET:
+		gh->gre_proto = htons(ETHERTYPE_IP);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		gh->gre_proto = htons(ETHERTYPE_IPV6);
+		break;
+#endif
+	default:
+		error = ENETDOWN;
+		goto drop;
+	}
+	if (want_seq)
+		gre_setseqn(gh, oseq);
+	if (want_csum) {
+		*(uint16_t *)gh->gre_opts = in_cksum_skip(m,
+		    m->m_pkthdr.len, olen);
+	}
+	plen = m->m_pkthdr.len - hlen;
+	switch (oaf) {
+#ifdef INET
+	case AF_INET:
+		error = in_gre_output(m, iaf, hlen);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		error = in6_gre_output(m, iaf, hlen);
+		break;
+#endif
+	default:
+		m_freem(m);
+		error = ENETDOWN;
+	};
+drop:
+	if (error)
+		ifp->if_oerrors++;
+	else {
+		ifp->if_opackets++;
+		ifp->if_obytes += plen;
+	}
+	return (error);
+}
 
-	/* end-around-carry */
-	sum = (sum >> 16) + (sum & 0xffff);
-	sum += (sum >> 16);
-	return (~sum);
+static void
+gre_qflush(struct ifnet *ifp __unused)
+{
+
 }
 
 static int
@@ -959,16 +983,12 @@
 
 	switch (type) {
 	case MOD_LOAD:
-		greattach();
-		break;
 	case MOD_UNLOAD:
-		if_clone_detach(&gre_cloner);
-		mtx_destroy(&gre_mtx);
 		break;
 	default:
-		return EOPNOTSUPP;
+		return (EOPNOTSUPP);
 	}
-	return 0;
+	return (0);
 }
 
 static moduledata_t gre_mod = {

Modified: trunk/sys/net/if_gre.h
===================================================================
--- trunk/sys/net/if_gre.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_gre.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,9 +1,7 @@
 /* $MidnightBSD$ */
-/*	$NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $ */
-/*	 $FreeBSD: stable/9/sys/net/if_gre.h 223223 2011-06-18 09:34:03Z bz $ */
-
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae at FreeBSD.org>
  * All rights reserved
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -29,143 +27,97 @@
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $
+ * $FreeBSD: stable/10/sys/net/if_gre.h 284072 2015-06-06 13:26:13Z ae $
  */
 
-#ifndef _NET_IF_GRE_H
-#define _NET_IF_GRE_H
+#ifndef _NET_IF_GRE_H_
+#define _NET_IF_GRE_H_
 
-#include <sys/ioccom.h>
 #ifdef _KERNEL
-#include <sys/queue.h>
-
-/*
- * Version of the WCCP, need to be configured manually since
- * header for version 2 is the same but IP payload is prepended
- * with additional 4-bytes field.
- */
-typedef enum {
-	WCCP_V1 = 0,
-	WCCP_V2
-} wccp_ver_t;
-
-struct gre_softc {
-	struct ifnet *sc_ifp;
-	LIST_ENTRY(gre_softc) sc_list;
-	int gre_unit;
-	int gre_flags;
-	u_int	gre_fibnum;	/* use this fib for envelopes */
-	struct in_addr g_src;	/* source address of gre packets */
-	struct in_addr g_dst;	/* destination address of gre packets */
-	struct route route;	/* routing entry that determines, where a
-				   encapsulated packet should go */
-	u_char g_proto;		/* protocol of encapsulator */
-
-	const struct encaptab *encap;	/* encapsulation cookie */
-
-	uint32_t key;		/* key included in outgoing GRE packets */
-				/* zero means none */
-
-	wccp_ver_t wccp_ver;	/* version of the WCCP */
-};
-#define	GRE2IFP(sc)	((sc)->sc_ifp)
-
-
-struct gre_h {
-	u_int16_t flags;	/* GRE flags */
-	u_int16_t ptype;	/* protocol type of payload typically
-				   Ether protocol type*/
-	uint32_t options[0];	/* optional options */
-/*
- *  from here on: fields are optional, presence indicated by flags
- *
-	u_int_16 checksum	checksum (one-complements of GRE header
-				and payload
-				Present if (ck_pres | rt_pres == 1).
-				Valid if (ck_pres == 1).
-	u_int_16 offset		offset from start of routing filed to
-				first octet of active SRE (see below).
-				Present if (ck_pres | rt_pres == 1).
-				Valid if (rt_pres == 1).
-	u_int_32 key		inserted by encapsulator e.g. for
-				authentication
-				Present if (key_pres ==1 ).
-	u_int_32 seq_num	Sequence number to allow for packet order
-				Present if (seq_pres ==1 ).
-	struct gre_sre[] routing Routing fileds (see below)
-				Present if (rt_pres == 1)
- */
+/* GRE header according to RFC 2784 and RFC 2890 */
+struct grehdr {
+	uint16_t	gre_flags;	/* GRE flags */
+#define	GRE_FLAGS_CP	0x8000		/* checksum present */
+#define	GRE_FLAGS_KP	0x2000		/* key present */
+#define	GRE_FLAGS_SP	0x1000		/* sequence present */
+#define	GRE_FLAGS_MASK	(GRE_FLAGS_CP|GRE_FLAGS_KP|GRE_FLAGS_SP)
+	uint16_t	gre_proto;	/* protocol type */
+	uint32_t	gre_opts[0];	/* optional fields */
 } __packed;
 
+#ifdef INET
 struct greip {
-	struct ip gi_i;
-	struct gre_h  gi_g;
+	struct ip	gi_ip;
+	struct grehdr	gi_gre;
 } __packed;
+#endif
 
-#define gi_pr		gi_i.ip_p
-#define gi_len		gi_i.ip_len
-#define gi_src		gi_i.ip_src
-#define gi_dst		gi_i.ip_dst
-#define gi_ptype	gi_g.ptype
-#define gi_flags	gi_g.flags
-#define gi_options	gi_g.options
+#ifdef INET6
+struct greip6 {
+	struct ip6_hdr	gi6_ip6;
+	struct grehdr	gi6_gre;
+} __packed;
+#endif
 
-#define GRE_CP		0x8000  /* Checksum Present */
-#define GRE_RP		0x4000  /* Routing Present */
-#define GRE_KP		0x2000  /* Key Present */
-#define GRE_SP		0x1000  /* Sequence Present */
-#define GRE_SS		0x0800	/* Strict Source Route */
+struct gre_softc {
+	struct ifnet		*gre_ifp;
+	LIST_ENTRY(gre_softc)	gre_list;
+	struct rmlock		gre_lock;
+	int			gre_family;	/* AF of delivery header */
+	uint32_t		gre_iseq;
+	uint32_t		gre_oseq;
+	uint32_t		gre_key;
+	uint32_t		gre_options;
+	uint32_t		gre_mtu;
+	u_int			gre_fibnum;
+	u_int			gre_hlen;	/* header size */
+	union {
+		void		*hdr;
+#ifdef INET
+		struct greip	*gihdr;
+#endif
+#ifdef INET6
+		struct greip6	*gi6hdr;
+#endif
+	} gre_uhdr;
+	const struct encaptab	*gre_ecookie;
+};
+#define	GRE2IFP(sc)		((sc)->gre_ifp)
+#define	GRE_LOCK_INIT(sc)	rm_init(&(sc)->gre_lock, "gre softc")
+#define	GRE_LOCK_DESTROY(sc)	rm_destroy(&(sc)->gre_lock)
+#define	GRE_RLOCK_TRACKER	struct rm_priotracker gre_tracker
+#define	GRE_RLOCK(sc)		rm_rlock(&(sc)->gre_lock, &gre_tracker)
+#define	GRE_RUNLOCK(sc)		rm_runlock(&(sc)->gre_lock, &gre_tracker)
+#define	GRE_RLOCK_ASSERT(sc)	rm_assert(&(sc)->gre_lock, RA_RLOCKED)
+#define	GRE_WLOCK(sc)		rm_wlock(&(sc)->gre_lock)
+#define	GRE_WUNLOCK(sc)		rm_wunlock(&(sc)->gre_lock)
+#define	GRE_WLOCK_ASSERT(sc)	rm_assert(&(sc)->gre_lock, RA_WLOCKED)
 
+#define	gre_hdr			gre_uhdr.hdr
+#define	gre_gihdr		gre_uhdr.gihdr
+#define	gre_gi6hdr		gre_uhdr.gi6hdr
+#define	gre_oip			gre_gihdr->gi_ip
+#define	gre_oip6		gre_gi6hdr->gi6_ip6
+
+int	gre_input(struct mbuf **, int *, int);
+#ifdef INET
+int	in_gre_attach(struct gre_softc *);
+int	in_gre_output(struct mbuf *, int, int);
+#endif
+#ifdef INET6
+int	in6_gre_attach(struct gre_softc *);
+int	in6_gre_output(struct mbuf *, int, int);
+#endif
 /*
  * CISCO uses special type for GRE tunnel created as part of WCCP
  * connection, while in fact those packets are just IPv4 encapsulated
  * into GRE.
  */
-#define WCCP_PROTOCOL_TYPE	0x883E
-
-/*
- * gre_sre defines a Source route Entry. These are needed if packets
- * should be routed over more than one tunnel hop by hop
- */
-struct gre_sre {
-	u_int16_t sre_family;	/* address family */
-	u_char	sre_offset;	/* offset to first octet of active entry */
-	u_char	sre_length;	/* number of octets in the SRE.
-				   sre_lengthl==0 -> last entry. */
-	u_char	*sre_rtinfo;	/* the routing information */
-};
-
-struct greioctl {
-	int unit;
-	struct in_addr addr;
-};
-
-/* for mobile encaps */
-
-struct mobile_h {
-	u_int16_t proto;		/* protocol and S-bit */
-	u_int16_t hcrc;			/* header checksum */
-	u_int32_t odst;			/* original destination address */
-	u_int32_t osrc;			/* original source addr, if S-bit set */
-} __packed;
-
-struct mobip_h {
-	struct ip	mi;
-	struct mobile_h	mh;
-} __packed;
-
-
-#define MOB_H_SIZ_S		(sizeof(struct mobile_h) - sizeof(u_int32_t))
-#define MOB_H_SIZ_L		(sizeof(struct mobile_h))
-#define MOB_H_SBIT	0x0080
-
-#define	GRE_TTL	30
-
+#define ETHERTYPE_WCCP		0x883E
 #endif /* _KERNEL */
 
-/*
- * ioctls needed to manipulate the interface
- */
-
 #define GRESADDRS	_IOW('i', 101, struct ifreq)
 #define GRESADDRD	_IOW('i', 102, struct ifreq)
 #define GREGADDRS	_IOWR('i', 103, struct ifreq)
@@ -172,15 +124,14 @@
 #define GREGADDRD	_IOWR('i', 104, struct ifreq)
 #define GRESPROTO	_IOW('i' , 105, struct ifreq)
 #define GREGPROTO	_IOWR('i', 106, struct ifreq)
-#define GREGKEY		_IOWR('i', 107, struct ifreq)
-#define GRESKEY		_IOW('i', 108, struct ifreq)
 
-#ifdef _KERNEL
-LIST_HEAD(gre_softc_head, gre_softc);
-extern struct mtx gre_mtx;
-extern struct gre_softc_head gre_softc_list;
+#define	GREGKEY		_IOWR('i', 107, struct ifreq)
+#define	GRESKEY		_IOW('i', 108, struct ifreq)
+#define	GREGOPTS	_IOWR('i', 109, struct ifreq)
+#define	GRESOPTS	_IOW('i', 110, struct ifreq)
 
-u_int16_t	gre_in_cksum(u_int16_t *, u_int);
-#endif /* _KERNEL */
+#define	GRE_ENABLE_CSUM		0x0001
+#define	GRE_ENABLE_SEQ		0x0002
+#define	GRE_OPTMASK		(GRE_ENABLE_CSUM|GRE_ENABLE_SEQ)
 
-#endif
+#endif /* _NET_IF_GRE_H_ */

Modified: trunk/sys/net/if_iso88025subr.c
===================================================================
--- trunk/sys/net/if_iso88025subr.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_iso88025subr.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -31,7 +31,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/if_iso88025subr.c 249132 2013-04-05 08:22:11Z mav $
+ * $FreeBSD: stable/10/sys/net/if_iso88025subr.c 332160 2018-04-07 00:04:28Z brooks $
  *
  */
 
@@ -201,13 +201,9 @@
                 }
                 break;
 
-        case SIOCGIFADDR: {
-                        struct sockaddr *sa;
-
-                        sa = (struct sockaddr *) & ifr->ifr_data;
-                        bcopy(IF_LLADDR(ifp),
-                              (caddr_t) sa->sa_data, ISO88025_ADDR_LEN);
-                }
+        case SIOCGIFADDR:
+		bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
+		    ISO88025_ADDR_LEN);
                 break;
 
         case SIOCSIFMTU:
@@ -232,11 +228,8 @@
  * ISO88025 encapsulation
  */
 int
-iso88025_output(ifp, m, dst, ro)
-	struct ifnet *ifp;
-	struct mbuf *m;
-	struct sockaddr *dst;
-	struct route *ro;
+iso88025_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+	struct route *ro)
 {
 	u_int16_t snap_type = 0;
 	int loop_copy = 0, error = 0, rif_len = 0;
@@ -339,7 +332,7 @@
 		bcopy((caddr_t)&(satoipx_addr(dst).x_host), (caddr_t)edst,
 		      ISO88025_ADDR_LEN);
 
-		M_PREPEND(m, 3, M_WAIT);
+		M_PREPEND(m, 3, M_WAITOK);
 		m = m_pullup(m, 3);
 		if (m == 0)
 			senderr(ENOBUFS);
@@ -352,7 +345,7 @@
 #endif	/* IPX */
 	case AF_UNSPEC:
 	{
-		struct iso88025_sockaddr_data *sd;
+		const struct iso88025_sockaddr_data *sd;
 		/*
 		 * For AF_UNSPEC sockaddr.sa_data must contain all of the
 		 * mac information needed to send the packet.  This allows
@@ -362,13 +355,12 @@
 		 * should be an iso88025_sockaddr_data structure see iso88025.h
 		 */
                 loop_copy = -1;
-		sd = (struct iso88025_sockaddr_data *)dst->sa_data;
+		sd = (const struct iso88025_sockaddr_data *)dst->sa_data;
 		gen_th.ac = sd->ac;
 		gen_th.fc = sd->fc;
-		(void)memcpy((caddr_t)edst, (caddr_t)sd->ether_dhost,
-			     ISO88025_ADDR_LEN);
-		(void)memcpy((caddr_t)gen_th.iso88025_shost,
-			     (caddr_t)sd->ether_shost, ISO88025_ADDR_LEN);
+		(void)memcpy(edst, sd->ether_dhost, ISO88025_ADDR_LEN);
+		(void)memcpy(gen_th.iso88025_shost, sd->ether_shost,
+		    ISO88025_ADDR_LEN);
 		rif_len = 0;
 		break;
 	}
@@ -383,7 +375,7 @@
 	 */
 	if (snap_type != 0) {
         	struct llc *l;
-		M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
+		M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
 		if (m == 0)
 			senderr(ENOBUFS);
 		l = mtod(m, struct llc *);
@@ -399,7 +391,7 @@
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 */
-	M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_DONTWAIT);
+	M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_NOWAIT);
 	if (m == 0)
 		senderr(ENOBUFS);
 	th = mtod(m, struct iso88025_header *);
@@ -481,7 +473,6 @@
 		goto dropanyway;
 	}
 	th = mtod(m, struct iso88025_header *);
-	m->m_pkthdr.header = (void *)th;
 
 	/*
 	 * Discard packet if interface is not up.

Modified: trunk/sys/net/if_lagg.c
===================================================================
--- trunk/sys/net/if_lagg.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_lagg.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -19,7 +19,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_lagg.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_lagg.c 313112 2017-02-02 23:04:01Z asomers $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -38,7 +38,7 @@
 #include <sys/proc.h>
 #include <sys/hash.h>
 #include <sys/lock.h>
-#include <sys/rwlock.h>
+#include <sys/rmlock.h>
 #include <sys/taskqueue.h>
 #include <sys/eventhandler.h>
 
@@ -52,18 +52,21 @@
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/bpf.h>
+#include <net/vnet.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
+#include <netinet/ip.h>
 #endif
 #ifdef INET
 #include <netinet/in_systm.h>
 #include <netinet/if_ether.h>
-#include <netinet/ip.h>
 #endif
 
 #ifdef INET6
 #include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/in6_ifattach.h>
 #endif
 
 #include <net/if_vlan_var.h>
@@ -80,12 +83,23 @@
 	{0, NULL}
 };
 
-SLIST_HEAD(__trhead, lagg_softc) lagg_list;	/* list of laggs */
-static struct mtx	lagg_list_mtx;
+VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
+#define	V_lagg_list	VNET(lagg_list)
+static VNET_DEFINE(struct mtx, lagg_list_mtx);
+#define	V_lagg_list_mtx	VNET(lagg_list_mtx)
+#define	LAGG_LIST_LOCK_INIT(x)		mtx_init(&V_lagg_list_mtx, \
+					"if_lagg list", NULL, MTX_DEF)
+#define	LAGG_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_lagg_list_mtx)
+#define	LAGG_LIST_LOCK(x)		mtx_lock(&V_lagg_list_mtx)
+#define	LAGG_LIST_UNLOCK(x)		mtx_unlock(&V_lagg_list_mtx)
 eventhandler_tag	lagg_detach_cookie = NULL;
 
 static int	lagg_clone_create(struct if_clone *, int, caddr_t);
 static void	lagg_clone_destroy(struct ifnet *);
+static VNET_DEFINE(struct if_clone *, lagg_cloner);
+#define	V_lagg_cloner	VNET(lagg_cloner)
+static const char laggname[] = "lagg";
+
 static void	lagg_lladdr(struct lagg_softc *, uint8_t *);
 static void	lagg_capabilities(struct lagg_softc *);
 static void	lagg_port_lladdr(struct lagg_port *, uint8_t *);
@@ -97,7 +111,7 @@
 static void	lagg_port_state(struct ifnet *, int);
 static int	lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
 static int	lagg_port_output(struct ifnet *, struct mbuf *,
-		    struct sockaddr *, struct route *);
+		    const struct sockaddr *, struct route *);
 static void	lagg_port_ifdetach(void *arg __unused, struct ifnet *);
 #ifdef LAGG_PORT_STACKING
 static int	lagg_port_checkstacking(struct lagg_softc *);
@@ -119,10 +133,8 @@
 	    struct lagg_port *);
 static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
 
-IFC_SIMPLE_DECLARE(lagg, 0);
-
 /* Simple round robin */
-static int	lagg_rr_attach(struct lagg_softc *);
+static void	lagg_rr_attach(struct lagg_softc *);
 static int	lagg_rr_detach(struct lagg_softc *);
 static int	lagg_rr_start(struct lagg_softc *, struct mbuf *);
 static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
@@ -129,7 +141,7 @@
 		    struct mbuf *);
 
 /* Active failover */
-static int	lagg_fail_attach(struct lagg_softc *);
+static void	lagg_fail_attach(struct lagg_softc *);
 static int	lagg_fail_detach(struct lagg_softc *);
 static int	lagg_fail_start(struct lagg_softc *, struct mbuf *);
 static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
@@ -136,7 +148,7 @@
 		    struct mbuf *);
 
 /* Loadbalancing */
-static int	lagg_lb_attach(struct lagg_softc *);
+static void	lagg_lb_attach(struct lagg_softc *);
 static int	lagg_lb_detach(struct lagg_softc *);
 static int	lagg_lb_port_create(struct lagg_port *);
 static void	lagg_lb_port_destroy(struct lagg_port *);
@@ -146,7 +158,7 @@
 static int	lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
 
 /* 802.3ad LACP */
-static int	lagg_lacp_attach(struct lagg_softc *);
+static void	lagg_lacp_attach(struct lagg_softc *);
 static int	lagg_lacp_detach(struct lagg_softc *);
 static int	lagg_lacp_start(struct lagg_softc *, struct mbuf *);
 static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
@@ -153,10 +165,12 @@
 		    struct mbuf *);
 static void	lagg_lacp_lladdr(struct lagg_softc *);
 
+static void	lagg_callout(void *);
+
 /* lagg protocol table */
-static const struct {
-	int			ti_proto;
-	int			(*ti_attach)(struct lagg_softc *);
+static const struct lagg_proto {
+	lagg_proto	ti_proto;
+	void		(*ti_attach)(struct lagg_softc *);
 } lagg_protos[] = {
 	{ LAGG_PROTO_ROUNDROBIN,	lagg_rr_attach },
 	{ LAGG_PROTO_FAILOVER,		lagg_fail_attach },
@@ -167,19 +181,52 @@
 };
 
 SYSCTL_DECL(_net_link);
-static SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
+SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
     "Link Aggregation");
 
-static int lagg_failover_rx_all = 0; /* Allow input on any failover links */
-SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
-    &lagg_failover_rx_all, 0,
+/* Allow input on any failover links */
+static VNET_DEFINE(int, lagg_failover_rx_all);
+#define	V_lagg_failover_rx_all	VNET(lagg_failover_rx_all)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET,
+    &VNET_NAME(lagg_failover_rx_all), 0,
     "Accept input from any interface in a failover lagg");
-static int def_use_flowid = 1; /* Default value for using M_FLOWID */
-TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid);
-SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW,
-    &def_use_flowid, 0,
+
+/* Default value for using M_FLOWID */
+static VNET_DEFINE(int, def_use_flowid) = 1;
+#define	V_def_use_flowid	VNET(def_use_flowid)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
+    &VNET_NAME(def_use_flowid), 0,
     "Default setting for using flow id for load sharing");
 
+/* Default value for using M_FLOWID */
+static VNET_DEFINE(int, def_flowid_shift) = 16;
+#define	V_def_flowid_shift	VNET(def_flowid_shift)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN,
+    &VNET_NAME(def_flowid_shift), 0,
+    "Default setting for flowid shift for load sharing");
+
+static void
+vnet_lagg_init(const void *unused __unused)
+{
+
+	LAGG_LIST_LOCK_INIT();
+	SLIST_INIT(&V_lagg_list);
+	V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create,
+	    lagg_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_lagg_init, NULL);
+
+static void
+vnet_lagg_uninit(const void *unused __unused)
+{
+
+	if_clone_detach(V_lagg_cloner);
+	LAGG_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_lagg_uninit, NULL);
+
 static int
 lagg_modevent(module_t mod, int type, void *data)
 {
@@ -186,9 +233,6 @@
 
 	switch (type) {
 	case MOD_LOAD:
-		mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF);
-		SLIST_INIT(&lagg_list);
-		if_clone_attach(&lagg_cloner);
 		lagg_input_p = lagg_input;
 		lagg_linkstate_p = lagg_port_state;
 		lagg_detach_cookie = EVENTHANDLER_REGISTER(
@@ -198,10 +242,8 @@
 	case MOD_UNLOAD:
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 		    lagg_detach_cookie);
-		if_clone_detach(&lagg_cloner);
 		lagg_input_p = NULL;
 		lagg_linkstate_p = NULL;
-		mtx_destroy(&lagg_list_mtx);
 		break;
 	default:
 		return (EOPNOTSUPP);
@@ -218,7 +260,6 @@
 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_lagg, 1);
 
-#if __FreeBSD_version >= 800000
 /*
  * This routine is run via an vlan
  * config EVENT
@@ -228,16 +269,17 @@
 {
         struct lagg_softc       *sc = ifp->if_softc;
         struct lagg_port        *lp;
+        struct rm_priotracker   tracker;
 
         if (ifp->if_softc !=  arg)   /* Not our event */
                 return;
 
-        LAGG_RLOCK(sc);
+        LAGG_RLOCK(sc, &tracker);
         if (!SLIST_EMPTY(&sc->sc_ports)) {
                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
                         EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
         }
-        LAGG_RUNLOCK(sc);
+        LAGG_RUNLOCK(sc, &tracker);
 }
 
 /*
@@ -249,18 +291,18 @@
 {
         struct lagg_softc       *sc = ifp->if_softc;
         struct lagg_port        *lp;
+        struct rm_priotracker   tracker;
 
         if (ifp->if_softc !=  arg)   /* Not our event */
                 return;
 
-        LAGG_RLOCK(sc);
+        LAGG_RLOCK(sc, &tracker);
         if (!SLIST_EMPTY(&sc->sc_ports)) {
                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
                         EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
         }
-        LAGG_RUNLOCK(sc);
+        LAGG_RUNLOCK(sc, &tracker);
 }
-#endif
 
 static int
 lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
@@ -267,10 +309,8 @@
 {
 	struct lagg_softc *sc;
 	struct ifnet *ifp;
-	int i, error = 0;
 	static const u_char eaddr[6];	/* 00:00:00:00:00:00 */
-	struct sysctl_oid *oid;
-	char num[14];			/* sufficient for 32 bits */
+	int i;
 
 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
 	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
@@ -279,17 +319,15 @@
 		return (ENOSPC);
 	}
 
-	sysctl_ctx_init(&sc->ctx);
-	snprintf(num, sizeof(num), "%u", unit);
-	sc->use_flowid = def_use_flowid;
-	oid = SYSCTL_ADD_NODE(&sc->ctx, &SYSCTL_NODE_CHILDREN(_net_link, lagg),
-		OID_AUTO, num, CTLFLAG_RD, NULL, "");
-	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
-		"use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid, sc->use_flowid,
-		"Use flow id for load sharing");
-	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
-		"count", CTLTYPE_INT|CTLFLAG_RD, &sc->sc_count, sc->sc_count,
-		"Total number of ports");
+	sc->sc_ipackets = counter_u64_alloc(M_WAITOK);
+	sc->sc_opackets = counter_u64_alloc(M_WAITOK);
+	sc->sc_ibytes = counter_u64_alloc(M_WAITOK);
+	sc->sc_obytes = counter_u64_alloc(M_WAITOK);
+
+	if (V_def_use_flowid)
+		sc->sc_opts |= LAGG_OPT_USE_FLOWID;
+	sc->flowid_shift = V_def_flowid_shift;
+
 	/* Hash all layers by default */
 	sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4;
 
@@ -297,18 +335,21 @@
 	for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
 		if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
 			sc->sc_proto = lagg_protos[i].ti_proto;
-			if ((error = lagg_protos[i].ti_attach(sc)) != 0) {
-				if_free_type(ifp, IFT_ETHER);
-				free(sc, M_DEVBUF);
-				return (error);
-			}
+			lagg_protos[i].ti_attach(sc);
 			break;
 		}
 	}
 	LAGG_LOCK_INIT(sc);
+	LAGG_CALLOUT_LOCK_INIT(sc);
 	SLIST_INIT(&sc->sc_ports);
 	TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
 
+	/*
+	 * This uses the callout lock rather than the rmlock; one can't
+	 * hold said rmlock during SWI.
+	 */
+	callout_init_mtx(&sc->sc_callout, &sc->sc_call_mtx, 0);
+
 	/* Initialise pseudo media types */
 	ifmedia_init(&sc->sc_media, 0, lagg_media_change,
 	    lagg_media_status);
@@ -315,8 +356,7 @@
 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
 
-	if_initname(ifp, ifc->ifc_name, unit);
-	ifp->if_type = IFT_ETHER;
+	if_initname(ifp, laggname, unit);
 	ifp->if_softc = sc;
 	ifp->if_transmit = lagg_transmit;
 	ifp->if_qflush = lagg_qflush;
@@ -323,25 +363,26 @@
 	ifp->if_init = lagg_init;
 	ifp->if_ioctl = lagg_ioctl;
 	ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
+	ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
 
 	/*
-	 * Attach as an ordinary ethernet device, childs will be attached
+	 * Attach as an ordinary ethernet device, children will be attached
 	 * as special device IFT_IEEE8023ADLAG.
 	 */
 	ether_ifattach(ifp, eaddr);
 
-#if __FreeBSD_version >= 800000
 	sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 		lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
 	sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 		lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
-#endif
 
 	/* Insert into the global list of laggs */
-	mtx_lock(&lagg_list_mtx);
-	SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries);
-	mtx_unlock(&lagg_list_mtx);
+	LAGG_LIST_LOCK();
+	SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries);
+	LAGG_LIST_UNLOCK();
 
+	callout_reset(&sc->sc_callout, hz, lagg_callout, sc);
+
 	return (0);
 }
 
@@ -356,10 +397,8 @@
 	lagg_stop(sc);
 	ifp->if_flags &= ~IFF_UP;
 
-#if __FreeBSD_version >= 800000
 	EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
 	EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
-#endif
 
 	/* Shutdown and remove lagg ports */
 	while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
@@ -367,20 +406,29 @@
 	/* Unhook the aggregation protocol */
 	if (sc->sc_detach != NULL)
 		(*sc->sc_detach)(sc);
+	else
+		LAGG_WUNLOCK(sc);
 
-	LAGG_WUNLOCK(sc);
-
-	sysctl_ctx_free(&sc->ctx);
 	ifmedia_removeall(&sc->sc_media);
 	ether_ifdetach(ifp);
-	if_free_type(ifp, IFT_ETHER);
+	if_free(ifp);
 
-	mtx_lock(&lagg_list_mtx);
-	SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries);
-	mtx_unlock(&lagg_list_mtx);
+	/* This grabs sc_callout_mtx, serialising it correctly */
+	callout_drain(&sc->sc_callout);
 
+	/* At this point it's drained; we can free this */
+	counter_u64_free(sc->sc_ipackets);
+	counter_u64_free(sc->sc_opackets);
+	counter_u64_free(sc->sc_ibytes);
+	counter_u64_free(sc->sc_obytes);
+
+	LAGG_LIST_LOCK();
+	SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
+	LAGG_LIST_UNLOCK();
+
 	taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
 	LAGG_LOCK_DESTROY(sc);
+	LAGG_CALLOUT_LOCK_DESTROY(sc);
 	free(sc, M_DEVBUF);
 }
 
@@ -388,15 +436,28 @@
 lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
 {
 	struct ifnet *ifp = sc->sc_ifp;
+	struct lagg_port lp;
 
 	if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
 		return;
 
+	LAGG_WLOCK_ASSERT(sc);
+	/*
+	 * Set the link layer address on the lagg interface.
+	 * sc_lladdr() notifies the MAC change to
+	 * the aggregation protocol.  iflladdr_event handler which
+	 * may trigger gratuitous ARPs for INET will be handled in
+	 * a taskqueue.
+	 */
 	bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
-	/* Let the protocol know the MAC has changed */
 	if (sc->sc_lladdr != NULL)
 		(*sc->sc_lladdr)(sc);
-	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+
+	bzero(&lp, sizeof(lp));
+	lp.lp_ifp = sc->sc_ifp;
+	lp.lp_softc = sc;
+
+	lagg_port_lladdr(&lp, lladdr);
 }
 
 static void
@@ -405,14 +466,18 @@
 	struct lagg_port *lp;
 	int cap = ~0, ena = ~0;
 	u_long hwa = ~0UL;
+	struct ifnet_hw_tsomax hw_tsomax;
 
 	LAGG_WLOCK_ASSERT(sc);
 
+	memset(&hw_tsomax, 0, sizeof(hw_tsomax));
+
 	/* Get capabilities from the lagg ports */
 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 		cap &= lp->lp_ifp->if_capabilities;
 		ena &= lp->lp_ifp->if_capenable;
 		hwa &= lp->lp_ifp->if_hwassist;
+		if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax);
 	}
 	cap = (cap == ~0 ? 0 : cap);
 	ena = (ena == ~0 ? 0 : ena);
@@ -420,7 +485,8 @@
 
 	if (sc->sc_ifp->if_capabilities != cap ||
 	    sc->sc_ifp->if_capenable != ena ||
-	    sc->sc_ifp->if_hwassist != hwa) {
+	    sc->sc_ifp->if_hwassist != hwa ||
+	    if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) {
 		sc->sc_ifp->if_capabilities = cap;
 		sc->sc_ifp->if_capenable = ena;
 		sc->sc_ifp->if_hwassist = hwa;
@@ -439,11 +505,13 @@
 	struct ifnet *ifp = lp->lp_ifp;
 	struct lagg_llq *llq;
 	int pending = 0;
+	int primary;
 
 	LAGG_WLOCK_ASSERT(sc);
 
-	if (lp->lp_detaching ||
-	    memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
+	primary = (sc->sc_primary->lp_ifp == ifp) ? 1 : 0;
+	if (primary == 0 && (lp->lp_detaching ||
+	    memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0))
 		return;
 
 	/* Check to make sure its not already queued to be changed */
@@ -462,6 +530,7 @@
 
 	/* Update the lladdr even if pending, it may have changed */
 	llq->llq_ifp = ifp;
+	llq->llq_primary = primary;
 	bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
 
 	if (!pending)
@@ -494,12 +563,20 @@
 	for (llq = head; llq != NULL; llq = head) {
 		ifp = llq->llq_ifp;
 
-		/* Set the link layer address */
-		error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN);
-		if (error)
-			printf("%s: setlladdr failed on %s\n", __func__,
-			    ifp->if_xname);
-
+		CURVNET_SET(ifp->if_vnet);
+		if (llq->llq_primary == 0) {
+			/*
+			 * Set the link layer address on the laggport interface.
+			 * if_setlladdr() triggers gratuitous ARPs for INET.
+			 */
+			error = if_setlladdr(ifp, llq->llq_lladdr,
+			    ETHER_ADDR_LEN);
+			if (error)
+				printf("%s: setlladdr failed on %s\n", __func__,
+				    ifp->if_xname);
+		} else
+			EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+		CURVNET_RESTORE();
 		head = SLIST_NEXT(llq, llq_entries);
 		free(llq, M_DEVBUF);
 	}
@@ -509,7 +586,7 @@
 lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
 {
 	struct lagg_softc *sc_ptr;
-	struct lagg_port *lp;
+	struct lagg_port *lp, *tlp;
 	int error = 0;
 
 	LAGG_WLOCK_ASSERT(sc);
@@ -545,10 +622,10 @@
 		return (ENOMEM);
 
 	/* Check if port is a stacked lagg */
-	mtx_lock(&lagg_list_mtx);
-	SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) {
+	LAGG_LIST_LOCK();
+	SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
 		if (ifp == sc_ptr->sc_ifp) {
-			mtx_unlock(&lagg_list_mtx);
+			LAGG_LIST_UNLOCK();
 			free(lp, M_DEVBUF);
 			return (EINVAL);
 			/* XXX disable stacking for the moment, its untested */
@@ -556,7 +633,7 @@
 			lp->lp_flags |= LAGG_PORT_STACK;
 			if (lagg_port_checkstacking(sc_ptr) >=
 			    LAGG_MAX_STACKING) {
-				mtx_unlock(&lagg_list_mtx);
+				LAGG_LIST_UNLOCK();
 				free(lp, M_DEVBUF);
 				return (E2BIG);
 			}
@@ -563,7 +640,7 @@
 #endif
 		}
 	}
-	mtx_unlock(&lagg_list_mtx);
+	LAGG_LIST_UNLOCK();
 
 	/* Change the interface type */
 	lp->lp_iftype = ifp->if_type;
@@ -588,8 +665,23 @@
 		lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp));
 	}
 
-	/* Insert into the list of ports */
-	SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
+	/*
+	 * Insert into the list of ports.
+	 * Keep ports sorted by if_index. It is handy, when configuration
+	 * is predictable and `ifconfig laggN create ...` command
+	 * will lead to the same result each time.
+	 */
+	SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) {
+		if (tlp->lp_ifp->if_index < ifp->if_index && (
+		    SLIST_NEXT(tlp, lp_entries) == NULL ||
+		    SLIST_NEXT(tlp, lp_entries)->lp_ifp->if_index >
+		    ifp->if_index))
+			break;
+	}
+	if (tlp != NULL)
+		SLIST_INSERT_AFTER(tlp, lp, lp_entries);
+	else
+		SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
 	sc->sc_count++;
 
 	/* Update lagg capabilities */
@@ -714,6 +806,7 @@
 	struct lagg_softc *sc;
 	struct lagg_port *lp = NULL;
 	int error = 0;
+	struct rm_priotracker tracker;
 
 	/* Should be checked by the caller */
 	if (ifp->if_type != IFT_IEEE8023ADLAG ||
@@ -728,15 +821,15 @@
 			break;
 		}
 
-		LAGG_RLOCK(sc);
+		LAGG_RLOCK(sc, &tracker);
 		if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
 			error = ENOENT;
-			LAGG_RUNLOCK(sc);
+			LAGG_RUNLOCK(sc, &tracker);
 			break;
 		}
 
 		lagg_port2req(lp, rp);
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 		break;
 
 	case SIOCSIFCAP:
@@ -766,7 +859,7 @@
 	return (error);
 
 fallback:
-	if (lp->lp_ioctl != NULL)
+	if (lp != NULL && lp->lp_ioctl != NULL)
 		return ((*lp->lp_ioctl)(ifp, cmd, data));
 
 	return (EINVAL);
@@ -777,7 +870,7 @@
  */
 static int
 lagg_port_output(struct ifnet *ifp, struct mbuf *m,
-	struct sockaddr *dst, struct route *ro)
+	const struct sockaddr *dst, struct route *ro)
 {
 	struct lagg_port *lp = ifp->if_lagg;
 
@@ -789,7 +882,7 @@
 
 	/* drop any other frames */
 	m_freem(m);
-	return (EBUSY);
+	return (ENETDOWN);
 }
 
 static void
@@ -897,29 +990,32 @@
 {
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 	struct lagg_reqall *ra = (struct lagg_reqall *)data;
+	struct lagg_reqopts *ro = (struct lagg_reqopts *)data;
 	struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
 	struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct lagg_port *lp;
+	const struct lagg_proto *proto = NULL;
 	struct ifnet *tpif;
 	struct thread *td = curthread;
 	char *buf, *outbuf;
 	int count, buflen, len, error = 0;
+	struct rm_priotracker tracker;
 
 	bzero(&rpbuf, sizeof(rpbuf));
 
 	switch (cmd) {
 	case SIOCGLAGG:
-		LAGG_RLOCK(sc);
+		LAGG_RLOCK(sc, &tracker);
 		count = 0;
 		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 			count++;
 		buflen = count * sizeof(struct lagg_reqport);
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 
 		outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
 
-		LAGG_RLOCK(sc);
+		LAGG_RLOCK(sc, &tracker);
 		ra->ra_proto = sc->sc_proto;
 		if (sc->sc_req != NULL)
 			(*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
@@ -937,7 +1033,7 @@
 			buf += sizeof(rpbuf);
 			len -= sizeof(rpbuf);
 		}
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 		ra->ra_ports = count;
 		ra->ra_size = count * sizeof(rpbuf);
 		error = copyout(outbuf, ra->ra_port, ra->ra_size);
@@ -947,15 +1043,27 @@
 		error = priv_check(td, PRIV_NET_LAGG);
 		if (error)
 			break;
-		if (ra->ra_proto >= LAGG_PROTO_MAX) {
+		for (proto = lagg_protos; proto->ti_proto != LAGG_PROTO_NONE;
+		    proto++) {
+			if (proto->ti_proto == ra->ra_proto) {
+				if (sc->sc_ifflags & IFF_DEBUG)
+					printf("%s: using proto %u\n",
+					    sc->sc_ifname, proto->ti_proto);
+				break;
+			}
+		}
+		if (proto->ti_proto >= LAGG_PROTO_MAX) {
 			error = EPROTONOSUPPORT;
 			break;
 		}
+		/* Set to LAGG_PROTO_NONE during the attach. */
 		LAGG_WLOCK(sc);
 		if (sc->sc_proto != LAGG_PROTO_NONE) {
-			/* Reset protocol first in case detach unlocks */
+			int (*sc_detach)(struct lagg_softc *sc);
+
+			/* Reset protocol and pointers */
 			sc->sc_proto = LAGG_PROTO_NONE;
-			error = sc->sc_detach(sc);
+			sc_detach = sc->sc_detach;
 			sc->sc_detach = NULL;
 			sc->sc_start = NULL;
 			sc->sc_input = NULL;
@@ -967,30 +1075,138 @@
 			sc->sc_lladdr = NULL;
 			sc->sc_req = NULL;
 			sc->sc_portreq = NULL;
-		} else if (sc->sc_input != NULL) {
-			/* Still detaching */
-			error = EBUSY;
+
+			if (sc_detach != NULL)
+				sc_detach(sc);
+			else
+				LAGG_WUNLOCK(sc);
+		} else
+			LAGG_WUNLOCK(sc);
+		if (proto->ti_proto != LAGG_PROTO_NONE)
+			proto->ti_attach(sc);
+		LAGG_WLOCK(sc);
+		sc->sc_proto = proto->ti_proto;
+		LAGG_WUNLOCK(sc);
+		break;
+	case SIOCGLAGGOPTS:
+		ro->ro_opts = sc->sc_opts;
+		if (sc->sc_proto == LAGG_PROTO_LACP) {
+			struct lacp_softc *lsc;
+
+			lsc = (struct lacp_softc *)sc->sc_psc;
+			if (lsc->lsc_debug.lsc_tx_test != 0)
+				ro->ro_opts |= LAGG_OPT_LACP_TXTEST;
+			if (lsc->lsc_debug.lsc_rx_test != 0)
+				ro->ro_opts |= LAGG_OPT_LACP_RXTEST;
+			if (lsc->lsc_strict_mode != 0)
+				ro->ro_opts |= LAGG_OPT_LACP_STRICT;
+			if (lsc->lsc_fast_timeout != 0)
+				ro->ro_opts |= LAGG_OPT_LACP_TIMEOUT;
+
+			ro->ro_active = sc->sc_active;
+		} else {
+			ro->ro_active = 0;
+			SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+				ro->ro_active += LAGG_PORTACTIVE(lp);
 		}
-		if (error != 0) {
-			LAGG_WUNLOCK(sc);
+		ro->ro_flapping = sc->sc_flapping;
+		ro->ro_flowid_shift = sc->flowid_shift;
+		break;
+	case SIOCSLAGGOPTS:
+		error = priv_check(td, PRIV_NET_LAGG);
+		if (error)
 			break;
+		if (ro->ro_opts == 0)
+			break;
+		/*
+		 * Set options.  LACP options are stored in sc->sc_psc,
+		 * not in sc_opts.
+		 */
+		int valid, lacp;
+
+		switch (ro->ro_opts) {
+		case LAGG_OPT_USE_FLOWID:
+		case -LAGG_OPT_USE_FLOWID:
+		case LAGG_OPT_FLOWIDSHIFT:
+			valid = 1;
+			lacp = 0;
+			break;
+		case LAGG_OPT_LACP_TXTEST:
+		case -LAGG_OPT_LACP_TXTEST:
+		case LAGG_OPT_LACP_RXTEST:
+		case -LAGG_OPT_LACP_RXTEST:
+		case LAGG_OPT_LACP_STRICT:
+		case -LAGG_OPT_LACP_STRICT:
+		case LAGG_OPT_LACP_TIMEOUT:
+		case -LAGG_OPT_LACP_TIMEOUT:
+			valid = lacp = 1;
+			break;
+		default:
+			valid = lacp = 0;
+			break;
 		}
-		for (int i = 0; i < (sizeof(lagg_protos) /
-		    sizeof(lagg_protos[0])); i++) {
-			if (lagg_protos[i].ti_proto == ra->ra_proto) {
-				if (sc->sc_ifflags & IFF_DEBUG)
-					printf("%s: using proto %u\n",
-					    sc->sc_ifname,
-					    lagg_protos[i].ti_proto);
-				sc->sc_proto = lagg_protos[i].ti_proto;
-				if (sc->sc_proto != LAGG_PROTO_NONE)
-					error = lagg_protos[i].ti_attach(sc);
-				LAGG_WUNLOCK(sc);
-				return (error);
+
+		LAGG_WLOCK(sc);
+		if (valid == 0 ||
+		    (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) {
+			/* Invalid combination of options specified. */
+			error = EINVAL;
+			LAGG_WUNLOCK(sc);
+			break;	/* Return from SIOCSLAGGOPTS. */ 
+		}
+		/*
+		 * Store new options into sc->sc_opts except for
+		 * FLOWIDSHIFT and LACP options.
+		 */
+		if (lacp == 0) {
+			if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT)
+				sc->flowid_shift = ro->ro_flowid_shift;
+			else if (ro->ro_opts > 0)
+				sc->sc_opts |= ro->ro_opts;
+			else
+				sc->sc_opts &= ~ro->ro_opts;
+		} else {
+			struct lacp_softc *lsc;
+			struct lacp_port *lp;
+
+			lsc = (struct lacp_softc *)sc->sc_psc;
+
+			switch (ro->ro_opts) {
+			case LAGG_OPT_LACP_TXTEST:
+				lsc->lsc_debug.lsc_tx_test = 1;
+				break;
+			case -LAGG_OPT_LACP_TXTEST:
+				lsc->lsc_debug.lsc_tx_test = 0;
+				break;
+			case LAGG_OPT_LACP_RXTEST:
+				lsc->lsc_debug.lsc_rx_test = 1;
+				break;
+			case -LAGG_OPT_LACP_RXTEST:
+				lsc->lsc_debug.lsc_rx_test = 0;
+				break;
+			case LAGG_OPT_LACP_STRICT:
+				lsc->lsc_strict_mode = 1;
+				break;
+			case -LAGG_OPT_LACP_STRICT:
+				lsc->lsc_strict_mode = 0;
+				break;
+			case LAGG_OPT_LACP_TIMEOUT:
+				LACP_LOCK(lsc);
+        			LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
+                        		lp->lp_state |= LACP_STATE_TIMEOUT;
+				LACP_UNLOCK(lsc);
+				lsc->lsc_fast_timeout = 1;
+				break;
+			case -LAGG_OPT_LACP_TIMEOUT:
+				LACP_LOCK(lsc);
+        			LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
+                        		lp->lp_state &= ~LACP_STATE_TIMEOUT;
+				LACP_UNLOCK(lsc);
+				lsc->lsc_fast_timeout = 0;
+				break;
 			}
 		}
 		LAGG_WUNLOCK(sc);
-		error = EPROTONOSUPPORT;
 		break;
 	case SIOCGLAGGFLAGS:
 		rf->rf_flags = sc->sc_flags;
@@ -1015,16 +1231,16 @@
 			break;
 		}
 
-		LAGG_RLOCK(sc);
+		LAGG_RLOCK(sc, &tracker);
 		if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
 		    lp->lp_softc != sc) {
 			error = ENOENT;
-			LAGG_RUNLOCK(sc);
+			LAGG_RUNLOCK(sc, &tracker);
 			break;
 		}
 
 		lagg_port2req(lp, rp);
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 		break;
 	case SIOCSLAGGPORT:
 		error = priv_check(td, PRIV_NET_LAGG);
@@ -1035,6 +1251,26 @@
 			error = EINVAL;
 			break;
 		}
+#ifdef INET6
+		/*
+		 * A laggport interface should not have inet6 address
+		 * because two interfaces with a valid link-local
+		 * scope zone must not be merged in any form.  This
+		 * restriction is needed to prevent violation of
+		 * link-local scope zone.  Attempts to add a laggport
+		 * interface which has inet6 addresses triggers
+		 * removal of all inet6 addresses on the member
+		 * interface.
+		 */
+		if (in6ifa_llaonifp(tpif)) {
+			in6_ifdetach(tpif);
+				if_printf(sc->sc_ifp,
+				    "IPv6 addresses on %s have been removed "
+				    "before adding it as a member to prevent "
+				    "IPv6 address scope violation.\n",
+				    tpif->if_xname);
+		}
+#endif
 		LAGG_WLOCK(sc);
 		error = lagg_port_create(sc, tpif);
 		LAGG_WUNLOCK(sc);
@@ -1133,39 +1369,39 @@
 	struct ifnet *ifp = lp->lp_ifp;
 	struct ifnet *scifp = sc->sc_ifp;
 	struct lagg_mc *mc;
-	struct ifmultiaddr *ifma, *rifma = NULL;
-	struct sockaddr_dl sdl;
+	struct ifmultiaddr *ifma;
 	int error;
 
 	LAGG_WLOCK_ASSERT(sc);
 
-	bzero((char *)&sdl, sizeof(sdl));
-	sdl.sdl_len = sizeof(sdl);
-	sdl.sdl_family = AF_LINK;
-	sdl.sdl_type = IFT_ETHER;
-	sdl.sdl_alen = ETHER_ADDR_LEN;
-	sdl.sdl_index = ifp->if_index;
-
 	if (set) {
+		IF_ADDR_WLOCK(scifp);
 		TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_LINK)
 				continue;
-			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
-			    LLADDR(&sdl), ETHER_ADDR_LEN);
-
-			error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
-			if (error)
-				return (error);
 			mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
-			if (mc == NULL)
+			if (mc == NULL) {
+				IF_ADDR_WUNLOCK(scifp);
 				return (ENOMEM);
-			mc->mc_ifma = rifma;
+			}
+			bcopy(ifma->ifma_addr, &mc->mc_addr,
+			    ifma->ifma_addr->sa_len);
+			mc->mc_addr.sdl_index = ifp->if_index;
+			mc->mc_ifma = NULL;
 			SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
 		}
+		IF_ADDR_WUNLOCK(scifp);
+		SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
+			error = if_addmulti(ifp,
+			    (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
+			if (error)
+				return (error);
+		}
 	} else {
 		while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
 			SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
-			if_delmulti_ifma(mc->mc_ifma);
+			if (mc->mc_ifma && !lp->lp_detaching)
+				if_delmulti_ifma(mc->mc_ifma);
 			free(mc, M_DEVBUF);
 		}
 	}
@@ -1230,14 +1466,15 @@
 {
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 	int error, len, mcast;
+	struct rm_priotracker tracker;
 
 	len = m->m_pkthdr.len;
 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
 
-	LAGG_RLOCK(sc);
+	LAGG_RLOCK(sc, &tracker);
 	/* We need a Tx algorithm and at least one port */
 	if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 		m_freem(m);
 		ifp->if_oerrors++;
 		return (ENXIO);
@@ -1246,12 +1483,12 @@
 	ETHER_BPF_MTAP(ifp, m);
 
 	error = (*sc->sc_start)(sc, m);
-	LAGG_RUNLOCK(sc);
+	LAGG_RUNLOCK(sc, &tracker);
 
 	if (error == 0) {
-		ifp->if_opackets++;
+		counter_u64_add(sc->sc_opackets, 1);
+		counter_u64_add(sc->sc_obytes, len);
 		ifp->if_omcasts += mcast;
-		ifp->if_obytes += len;
 	} else
 		ifp->if_oerrors++;
 
@@ -1272,12 +1509,13 @@
 	struct lagg_port *lp = ifp->if_lagg;
 	struct lagg_softc *sc = lp->lp_softc;
 	struct ifnet *scifp = sc->sc_ifp;
+	struct rm_priotracker tracker;
 
-	LAGG_RLOCK(sc);
+	LAGG_RLOCK(sc, &tracker);
 	if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    (lp->lp_flags & LAGG_PORT_DISABLED) ||
 	    sc->sc_proto == LAGG_PROTO_NONE) {
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 		m_freem(m);
 		return (NULL);
 	}
@@ -1284,11 +1522,11 @@
 
 	ETHER_BPF_MTAP(scifp, m);
 
-	m = (*sc->sc_input)(sc, lp, m);
+	m = (lp->lp_detaching == 0) ? (*sc->sc_input)(sc, lp, m) : NULL;
 
 	if (m != NULL) {
-		scifp->if_ipackets++;
-		scifp->if_ibytes += m->m_pkthdr.len;
+		counter_u64_add(sc->sc_ipackets, 1);
+		counter_u64_add(sc->sc_ibytes, m->m_pkthdr.len);
 
 		if (scifp->if_flags & IFF_MONITOR) {
 			m_freem(m);
@@ -1296,7 +1534,7 @@
 		}
 	}
 
-	LAGG_RUNLOCK(sc);
+	LAGG_RUNLOCK(sc, &tracker);
 	return (m);
 }
 
@@ -1317,16 +1555,17 @@
 {
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 	struct lagg_port *lp;
+	struct rm_priotracker tracker;
 
 	imr->ifm_status = IFM_AVALID;
 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
 
-	LAGG_RLOCK(sc);
+	LAGG_RLOCK(sc, &tracker);
 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 		if (LAGG_PORTACTIVE(lp))
 			imr->ifm_status |= IFM_ACTIVE;
 	}
-	LAGG_RUNLOCK(sc);
+	LAGG_RUNLOCK(sc, &tracker);
 }
 
 static void
@@ -1558,18 +1797,16 @@
 /*
  * Simple round robin aggregation
  */
-
-static int
+static void
 lagg_rr_attach(struct lagg_softc *sc)
 {
 	sc->sc_detach = lagg_rr_detach;
 	sc->sc_start = lagg_rr_start;
 	sc->sc_input = lagg_rr_input;
+	sc->sc_detach = NULL;
 	sc->sc_port_create = NULL;
 	sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
 	sc->sc_seq = 0;
-
-	return (0);
 }
 
 static int
@@ -1596,7 +1833,7 @@
 	 */
 	if ((lp = lagg_link_active(sc, lp)) == NULL) {
 		m_freem(m);
-		return (ENOENT);
+		return (ENETDOWN);
 	}
 
 	/* Send mbuf */
@@ -1617,8 +1854,7 @@
 /*
  * Active failover
  */
-
-static int
+static void
 lagg_fail_attach(struct lagg_softc *sc)
 {
 	sc->sc_detach = lagg_fail_detach;
@@ -1626,8 +1862,7 @@
 	sc->sc_input = lagg_fail_input;
 	sc->sc_port_create = NULL;
 	sc->sc_port_destroy = NULL;
-
-	return (0);
+	sc->sc_detach = NULL;
 }
 
 static int
@@ -1644,7 +1879,7 @@
 	/* Use the master port if active or the next available port */
 	if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
 		m_freem(m);
-		return (ENOENT);
+		return (ENETDOWN);
 	}
 
 	/* Send mbuf */
@@ -1657,7 +1892,7 @@
 	struct ifnet *ifp = sc->sc_ifp;
 	struct lagg_port *tmp_tp;
 
-	if (lp == sc->sc_primary || lagg_failover_rx_all) {
+	if (lp == sc->sc_primary || V_lagg_failover_rx_all) {
 		m->m_pkthdr.rcvif = ifp;
 		return (m);
 	}
@@ -1681,16 +1916,13 @@
 /*
  * Loadbalancing
  */
-
-static int
+static void
 lagg_lb_attach(struct lagg_softc *sc)
 {
 	struct lagg_port *lp;
 	struct lagg_lb *lb;
 
-	if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb),
-	    M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
-		return (ENOMEM);
+	lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO);
 
 	sc->sc_detach = lagg_lb_detach;
 	sc->sc_start = lagg_lb_start;
@@ -1704,8 +1936,6 @@
 
 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 		lagg_lb_port_create(lp);
-
-	return (0);
 }
 
 static int
@@ -1712,6 +1942,7 @@
 lagg_lb_detach(struct lagg_softc *sc)
 {
 	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
+	LAGG_WUNLOCK(sc);
 	if (lb != NULL)
 		free(lb, M_DEVBUF);
 	return (0);
@@ -1760,8 +1991,9 @@
 	struct lagg_port *lp = NULL;
 	uint32_t p = 0;
 
-	if (sc->use_flowid && (m->m_flags & M_FLOWID))
-		p = m->m_pkthdr.flowid;
+	if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
+	    M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+		p = m->m_pkthdr.flowid >> sc->flowid_shift;
 	else
 		p = lagg_hashmbuf(sc, m, lb->lb_key);
 	p %= sc->sc_count;
@@ -1773,7 +2005,7 @@
 	 */
 	if ((lp = lagg_link_active(sc, lp)) == NULL) {
 		m_freem(m);
-		return (ENOENT);
+		return (ENETDOWN);
 	}
 
 	/* Send mbuf */
@@ -1794,12 +2026,10 @@
 /*
  * 802.3ad LACP
  */
-
-static int
+static void
 lagg_lacp_attach(struct lagg_softc *sc)
 {
 	struct lagg_port *lp;
-	int error;
 
 	sc->sc_detach = lagg_lacp_detach;
 	sc->sc_port_create = lacp_port_create;
@@ -1813,14 +2043,10 @@
 	sc->sc_req = lacp_req;
 	sc->sc_portreq = lacp_portreq;
 
-	error = lacp_attach(sc);
-	if (error)
-		return (error);
+	lacp_attach(sc);
 
 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 		lacp_port_create(lp);
-
-	return (error);
 }
 
 static int
@@ -1827,17 +2053,18 @@
 lagg_lacp_detach(struct lagg_softc *sc)
 {
 	struct lagg_port *lp;
-	int error;
+	void *psc;
 
 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 		lacp_port_destroy(lp);
 
-	/* unlocking is safe here */
+	psc = sc->sc_psc;
+	sc->sc_psc = NULL;
 	LAGG_WUNLOCK(sc);
-	error = lacp_detach(sc);
-	LAGG_WLOCK(sc);
 
-	return (error);
+	lacp_detach(psc);
+
+	return (0);
 }
 
 static void
@@ -1862,7 +2089,7 @@
 	lp = lacp_select_tx_port(sc, m);
 	if (lp == NULL) {
 		m_freem(m);
-		return (EBUSY);
+		return (ENETDOWN);
 	}
 
 	/* Send mbuf */
@@ -1898,3 +2125,17 @@
 	m->m_pkthdr.rcvif = ifp;
 	return (m);
 }
+
+static void
+lagg_callout(void *arg)
+{
+	struct lagg_softc *sc = (struct lagg_softc *)arg;
+	struct ifnet *ifp = sc->sc_ifp;
+
+	ifp->if_ipackets = counter_u64_fetch(sc->sc_ipackets);
+	ifp->if_opackets = counter_u64_fetch(sc->sc_opackets);
+	ifp->if_ibytes = counter_u64_fetch(sc->sc_ibytes);
+	ifp->if_obytes = counter_u64_fetch(sc->sc_obytes);
+
+	callout_reset(&sc->sc_callout, hz, lagg_callout, sc);
+}

Modified: trunk/sys/net/if_lagg.h
===================================================================
--- trunk/sys/net/if_lagg.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_lagg.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -16,14 +16,12 @@
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  *
- * $FreeBSD: stable/9/sys/net/if_lagg.h 236049 2012-05-26 07:39:52Z thompsa $
+ * $FreeBSD: stable/10/sys/net/if_lagg.h 287808 2015-09-15 05:19:10Z hiren $
  */
 
 #ifndef _NET_LAGG_H
 #define _NET_LAGG_H
 
-#include <sys/sysctl.h>
-
 /*
  * Global definitions
  */
@@ -50,17 +48,19 @@
 				  "\05DISTRIBUTING\06DISABLED"
 
 /* Supported lagg PROTOs */
-#define	LAGG_PROTO_NONE		0	/* no lagg protocol defined */
-#define	LAGG_PROTO_ROUNDROBIN	1	/* simple round robin */
-#define	LAGG_PROTO_FAILOVER	2	/* active failover */
-#define	LAGG_PROTO_LOADBALANCE	3	/* loadbalance */
-#define	LAGG_PROTO_LACP		4	/* 802.3ad lacp */
-#define	LAGG_PROTO_ETHERCHANNEL	5	/* Cisco FEC */
-#define	LAGG_PROTO_MAX		6
+typedef enum {
+	LAGG_PROTO_NONE = 0,	/* no lagg protocol defined */
+	LAGG_PROTO_ROUNDROBIN,	/* simple round robin */
+	LAGG_PROTO_FAILOVER,	/* active failover */
+	LAGG_PROTO_LOADBALANCE,	/* loadbalance */
+	LAGG_PROTO_LACP,	/* 802.3ad lacp */
+	LAGG_PROTO_ETHERCHANNEL,/* Cisco FEC */
+	LAGG_PROTO_MAX,
+} lagg_proto;
 
 struct lagg_protos {
 	const char		*lpr_name;
-	int			lpr_proto;
+	lagg_proto		lpr_proto;
 };
 
 #define	LAGG_PROTO_DEFAULT	LAGG_PROTO_FAILOVER
@@ -137,7 +137,35 @@
 #define	SIOCGLAGGFLAGS		_IOWR('i', 145, struct lagg_reqflags)
 #define	SIOCSLAGGHASH		 _IOW('i', 146, struct lagg_reqflags)
 
+struct lagg_reqopts {
+	char			ro_ifname[IFNAMSIZ];	/* name of the lagg */
+
+	int			ro_opts;		/* Option bitmap */
+#define	LAGG_OPT_NONE			0x00
+#define	LAGG_OPT_USE_FLOWID		0x01		/* use M_FLOWID */
+/* Pseudo flags which are used in ro_opts but not stored into sc_opts. */
+#define	LAGG_OPT_FLOWIDSHIFT		0x02		/* Set flowid */
+#define	LAGG_OPT_FLOWIDSHIFT_MASK	0x1f		/* flowid is uint32_t */
+#define	LAGG_OPT_LACP_STRICT		0x10		/* LACP strict mode */
+#define	LAGG_OPT_LACP_TXTEST		0x20		/* LACP debug: txtest */
+#define	LAGG_OPT_LACP_RXTEST		0x40		/* LACP debug: rxtest */
+#define	LAGG_OPT_LACP_TIMEOUT		0x80		/* LACP timeout */
+	u_int			ro_count;		/* number of ports */
+	u_int			ro_active;		/* active port count */
+	u_int			ro_flapping;		/* number of flapping */
+	int			ro_flowid_shift;	/* shift the flowid */
+};
+
+#define	SIOCGLAGGOPTS		_IOWR('i', 152, struct lagg_reqopts)
+#define	SIOCSLAGGOPTS		 _IOW('i', 153, struct lagg_reqopts)
+
+#define	LAGG_OPT_BITS		"\020\001USE_FLOWID\005LACP_STRICT" \
+				"\006LACP_TXTEST\007LACP_RXTEST"
+
 #ifdef _KERNEL
+
+#include <sys/counter.h>
+
 /*
  * Internal kernel part
  */
@@ -174,6 +202,7 @@
 };
 
 struct lagg_mc {
+	struct sockaddr_dl	mc_addr;
 	struct ifmultiaddr      *mc_ifma;
 	SLIST_ENTRY(lagg_mc)	mc_entries;
 };
@@ -182,14 +211,19 @@
 struct lagg_llq {
 	struct ifnet		*llq_ifp;
 	uint8_t			llq_lladdr[ETHER_ADDR_LEN];
+	uint8_t			llq_primary;
 	SLIST_ENTRY(lagg_llq)	llq_entries;
 };
 
 struct lagg_softc {
 	struct ifnet			*sc_ifp;	/* virtual interface */
-	struct rwlock			sc_mtx;
+	struct rmlock			sc_mtx;
+	struct mtx			sc_call_mtx;
 	int				sc_proto;	/* lagg protocol */
 	u_int				sc_count;	/* number of ports */
+	u_int				sc_active;	/* active port count */
+	u_int				sc_flapping;	/* number of flapping
+							 * events */
 	struct lagg_port		*sc_primary;	/* primary port */
 	struct ifmedia			sc_media;	/* media config */
 	caddr_t				sc_psc;		/* protocol data */
@@ -196,6 +230,11 @@
 	uint32_t			sc_seq;		/* sequence counter */
 	uint32_t			sc_flags;
 
+	counter_u64_t			sc_ipackets;
+	counter_u64_t			sc_opackets;
+	counter_u64_t			sc_ibytes;
+	counter_u64_t			sc_obytes;
+
 	SLIST_HEAD(__tplhd, lagg_port)	sc_ports;	/* list of interfaces */
 	SLIST_ENTRY(lagg_softc)	sc_entries;
 
@@ -216,12 +255,11 @@
 	void	(*sc_lladdr)(struct lagg_softc *);
 	void	(*sc_req)(struct lagg_softc *, caddr_t);
 	void	(*sc_portreq)(struct lagg_port *, caddr_t);
-#if __FreeBSD_version >= 800000
 	eventhandler_tag vlan_attach;
 	eventhandler_tag vlan_detach;
-#endif
-	struct sysctl_ctx_list		ctx;		/* sysctl variables */
-	int				use_flowid;	/* use M_FLOWID */
+	struct callout			sc_callout;
+	u_int				sc_opts;
+	int				flowid_shift;	/* set flowid shift*/
 };
 
 struct lagg_port {
@@ -241,21 +279,26 @@
 
 	/* Redirected callbacks */
 	int	(*lp_ioctl)(struct ifnet *, u_long, caddr_t);
-	int	(*lp_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
-		     struct route *);
+	int	(*lp_output)(struct ifnet *, struct mbuf *,
+		     const struct sockaddr *, struct route *);
 
 	SLIST_ENTRY(lagg_port)		lp_entries;
 };
 
-#define	LAGG_LOCK_INIT(_sc)	rw_init(&(_sc)->sc_mtx, "if_lagg rwlock")
-#define	LAGG_LOCK_DESTROY(_sc)	rw_destroy(&(_sc)->sc_mtx)
-#define	LAGG_RLOCK(_sc)		rw_rlock(&(_sc)->sc_mtx)
-#define	LAGG_WLOCK(_sc)		rw_wlock(&(_sc)->sc_mtx)
-#define	LAGG_RUNLOCK(_sc)	rw_runlock(&(_sc)->sc_mtx)
-#define	LAGG_WUNLOCK(_sc)	rw_wunlock(&(_sc)->sc_mtx)
-#define	LAGG_RLOCK_ASSERT(_sc)	rw_assert(&(_sc)->sc_mtx, RA_RLOCKED)
-#define	LAGG_WLOCK_ASSERT(_sc)	rw_assert(&(_sc)->sc_mtx, RA_WLOCKED)
+#define	LAGG_LOCK_INIT(_sc)	rm_init(&(_sc)->sc_mtx, "if_lagg rmlock")
+#define	LAGG_LOCK_DESTROY(_sc)	rm_destroy(&(_sc)->sc_mtx)
+#define	LAGG_RLOCK(_sc, _p)	rm_rlock(&(_sc)->sc_mtx, (_p))
+#define	LAGG_WLOCK(_sc)		rm_wlock(&(_sc)->sc_mtx)
+#define	LAGG_RUNLOCK(_sc, _p)	rm_runlock(&(_sc)->sc_mtx, (_p))
+#define	LAGG_WUNLOCK(_sc)	rm_wunlock(&(_sc)->sc_mtx)
+#define	LAGG_RLOCK_ASSERT(_sc)	rm_assert(&(_sc)->sc_mtx, RA_RLOCKED)
+#define	LAGG_WLOCK_ASSERT(_sc)	rm_assert(&(_sc)->sc_mtx, RA_WLOCKED)
 
+#define	LAGG_CALLOUT_LOCK_INIT(_sc)					\
+	    mtx_init(&(_sc)->sc_call_mtx, "if_lagg callout mutex", NULL,\
+	    MTX_DEF)
+#define	LAGG_CALLOUT_LOCK_DESTROY(_sc)	mtx_destroy(&(_sc)->sc_call_mtx)
+
 extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
 extern void	(*lagg_linkstate_p)(struct ifnet *, int );
 
@@ -262,6 +305,8 @@
 int		lagg_enqueue(struct ifnet *, struct mbuf *);
 uint32_t	lagg_hashmbuf(struct lagg_softc *, struct mbuf *, uint32_t);
 
+SYSCTL_DECL(_net_link_lagg);
+
 #endif /* _KERNEL */
 
 #endif /* _NET_LAGG_H */

Modified: trunk/sys/net/if_llatbl.c
===================================================================
--- trunk/sys/net/if_llatbl.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_llatbl.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -26,7 +26,7 @@
  * SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_llatbl.c 248852 2013-03-28 20:48:40Z emaste $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_llatbl.c 294500 2016-01-21 14:04:02Z bz $");
 
 #include "opt_ddb.h"
 #include "opt_inet.h"
@@ -63,14 +63,10 @@
 
 MALLOC_DEFINE(M_LLTABLE, "lltable", "link level address tables");
 
-static VNET_DEFINE(SLIST_HEAD(, lltable), lltables);
+static VNET_DEFINE(SLIST_HEAD(, lltable), lltables) =
+    SLIST_HEAD_INITIALIZER(lltables);
 #define	V_lltables	VNET(lltables)
 
-extern void arprequest(struct ifnet *, struct in_addr *, struct in_addr *,
-	u_char *);
-
-static void vnet_lltable_init(void);
-
 struct rwlock lltable_rwlock;
 RW_SYSINIT(lltable_rwlock, &lltable_rwlock, "lltable_rwlock");
 
@@ -277,10 +273,9 @@
 	u_int laflags = 0, flags = 0;
 	int error = 0;
 
-	if (dl == NULL || dl->sdl_family != AF_LINK) {
-		log(LOG_INFO, "%s: invalid dl\n", __func__);
-		return EINVAL;
-	}
+	KASSERT(dl != NULL && dl->sdl_family == AF_LINK,
+	    ("%s: invalid dl\n", __func__));
+
 	ifp = ifnet_byindex(dl->sdl_index);
 	if (ifp == NULL) {
 		log(LOG_INFO, "%s: invalid ifp (sdl_index %d)\n",
@@ -290,28 +285,8 @@
 
 	switch (rtm->rtm_type) {
 	case RTM_ADD:
-		if (rtm->rtm_flags & RTF_ANNOUNCE) {
+		if (rtm->rtm_flags & RTF_ANNOUNCE)
 			flags |= LLE_PUB;
-#ifdef INET
-			if (dst->sa_family == AF_INET &&
-			    ((struct sockaddr_inarp *)dst)->sin_other != 0) {
-				struct rtentry *rt;
-				((struct sockaddr_inarp *)dst)->sin_other = 0;
-				rt = rtalloc1(dst, 0, 0);
-				if (rt == NULL || !(rt->rt_flags & RTF_HOST)) {
-					log(LOG_INFO, "%s: RTM_ADD publish "
-					    "(proxy only) is invalid\n",
-					    __func__);
-					if (rt)
-						RTFREE_LOCKED(rt);
-					return EINVAL;
-				}
-				RTFREE_LOCKED(rt);
-
-				flags |= LLE_PROXY;
-			}
-#endif
-		}
 		flags |= LLE_CREATE;
 		break;
 
@@ -350,7 +325,7 @@
 			 * LLE_DELETED flag, and reset the expiration timer
 			 */
 			bcopy(LLADDR(dl), &lle->ll_addr, ifp->if_addrlen);
-			lle->la_flags |= (flags & (LLE_PUB | LLE_PROXY));
+			lle->la_flags |= (flags & LLE_PUB);
 			lle->la_flags |= LLE_VALID;
 			lle->la_flags &= ~LLE_DELETED;
 #ifdef INET6
@@ -372,15 +347,12 @@
 			laflags = lle->la_flags;
 			LLE_WUNLOCK(lle);
 #ifdef INET
-			/*  gratuitous ARP */
-			if ((laflags & LLE_PUB) && dst->sa_family == AF_INET) {
+			/* gratuitous ARP */
+			if ((laflags & LLE_PUB) && dst->sa_family == AF_INET)
 				arprequest(ifp,
 				    &((struct sockaddr_in *)dst)->sin_addr,
 				    &((struct sockaddr_in *)dst)->sin_addr,
-				    ((laflags & LLE_PROXY) ?
-					(u_char *)IF_LLADDR(ifp) :
-					(u_char *)LLADDR(dl)));
-			}
+				    (u_char *)LLADDR(dl));
 #endif
 		} else {
 			if (flags & LLE_EXCLUSIVE)
@@ -395,15 +367,6 @@
 	return (error);
 }
 
-static void
-vnet_lltable_init()
-{
-
-	SLIST_INIT(&V_lltables);
-}
-VNET_SYSINIT(vnet_lltable_init, SI_SUB_PSEUDO, SI_ORDER_FIRST,
-    vnet_lltable_init, NULL);
-
 #ifdef DDB
 struct llentry_sa {
 	struct llentry		base;

Modified: trunk/sys/net/if_llatbl.h
===================================================================
--- trunk/sys/net/if_llatbl.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_llatbl.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -26,13 +26,11 @@
  * SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_llatbl.h 240313 2012-09-10 12:25:57Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_llatbl.h 254963 2013-08-27 16:45:00Z alfred $");
 
 #ifndef	_NET_IF_LLATBL_H_
 #define	_NET_IF_LLATBL_H_
 
-#include "opt_ofed.h"
-
 #include <sys/_rwlock.h>
 #include <netinet/in.h>
 
@@ -76,9 +74,7 @@
 	union {
 		uint64_t	mac_aligned;
 		uint16_t	mac16[3];
-#ifdef OFED
 		uint8_t		mac8[20];	/* IB needs 20 bytes. */
-#endif
 	} ll_addr;
 
 	/* XXX af-private? */
@@ -173,7 +169,6 @@
 #define	LLE_STATIC	0x0002	/* entry is static */
 #define	LLE_IFADDR	0x0004	/* entry is interface addr */
 #define	LLE_VALID	0x0008	/* ll_addr is valid */
-#define	LLE_PROXY	0x0010	/* proxy entry ??? */
 #define	LLE_PUB		0x0020	/* publish entry ??? */
 #define	LLE_LINKED	0x0040	/* linked to lookup structure */
 #define	LLE_EXCLUSIVE	0x2000	/* return lle xlocked  */
@@ -206,4 +201,14 @@
 }
 
 int		lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *);
+
+#include <sys/eventhandler.h>
+enum {
+	LLENTRY_RESOLVED,
+	LLENTRY_TIMEDOUT,
+	LLENTRY_DELETED,
+	LLENTRY_EXPIRED,
+};
+typedef void (*lle_event_fn)(void *, struct llentry *, int);
+EVENTHANDLER_DECLARE(lle_event, lle_event_fn);
 #endif  /* _NET_IF_LLATBL_H_ */

Modified: trunk/sys/net/if_llc.h
===================================================================
--- trunk/sys/net/if_llc.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_llc.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -30,7 +30,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_llc.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if_llc.h 164804 2006-12-01 17:50:11Z imp $
+ * $FreeBSD: stable/10/sys/net/if_llc.h 164804 2006-12-01 17:50:11Z imp $
  */
 
 #ifndef _NET_IF_LLC_H_

Modified: trunk/sys/net/if_loop.c
===================================================================
--- trunk/sys/net/if_loop.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_loop.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_loop.c	8.2 (Berkeley) 1/9/95
- * $FreeBSD: stable/9/sys/net/if_loop.c 238876 2012-07-28 23:11:09Z bz $
+ * $FreeBSD: stable/10/sys/net/if_loop.c 285605 2015-07-15 16:57:40Z pkelsey $
  */
 
 /*
@@ -102,7 +102,7 @@
 int		loioctl(struct ifnet *, u_long, caddr_t);
 static void	lortrequest(int, struct rtentry *, struct rt_addrinfo *);
 int		looutput(struct ifnet *ifp, struct mbuf *m,
-		    struct sockaddr *dst, struct route *ro);
+		    const struct sockaddr *dst, struct route *ro);
 static int	lo_clone_create(struct if_clone *, int, caddr_t);
 static void	lo_clone_destroy(struct ifnet *);
 
@@ -109,13 +109,12 @@
 VNET_DEFINE(struct ifnet *, loif);	/* Used externally */
 
 #ifdef VIMAGE
-static VNET_DEFINE(struct ifc_simple_data, lo_cloner_data);
-static VNET_DEFINE(struct if_clone, lo_cloner);
-#define	V_lo_cloner_data	VNET(lo_cloner_data)
+static VNET_DEFINE(struct if_clone *, lo_cloner);
 #define	V_lo_cloner		VNET(lo_cloner)
 #endif
 
-IFC_SIMPLE_DECLARE(lo, 1);
+static struct if_clone *lo_cloner;
+static const char loname[] = "lo";
 
 static void
 lo_clone_destroy(struct ifnet *ifp)
@@ -140,7 +139,7 @@
 	if (ifp == NULL)
 		return (ENOSPC);
 
-	if_initname(ifp, ifc->ifc_name, unit);
+	if_initname(ifp, loname, unit);
 	ifp->if_mtu = LOMTU;
 	ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
 	ifp->if_ioctl = loioctl;
@@ -162,12 +161,12 @@
 {
 
 #ifdef VIMAGE
+	lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy,
+	    1);
 	V_lo_cloner = lo_cloner;
-	V_lo_cloner_data = lo_cloner_data;
-	V_lo_cloner.ifc_data = &V_lo_cloner_data;
-	if_clone_attach(&V_lo_cloner);
 #else
-	if_clone_attach(&lo_cloner);
+	lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy,
+	    1);
 #endif
 }
 VNET_SYSINIT(vnet_loif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
@@ -178,7 +177,7 @@
 vnet_loif_uninit(const void *unused __unused)
 {
 
-	if_clone_detach(&V_lo_cloner);
+	if_clone_detach(V_lo_cloner);
 	V_loif = NULL;
 }
 VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
@@ -212,7 +211,7 @@
 DECLARE_MODULE(if_lo, loop_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
 
 int
-looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+looutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	u_int32_t af;
@@ -243,13 +242,13 @@
 	ifp->if_obytes += m->m_pkthdr.len;
 
 	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC) {
+	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
 		bcopy(dst->sa_data, &af, sizeof(af));
-		dst->sa_family = af;
-	}
+	else
+		af = dst->sa_family;
 
 #if 1	/* XXX */
-	switch (dst->sa_family) {
+	switch (af) {
 	case AF_INET:
 		if (ifp->if_capenable & IFCAP_RXCSUM) {
 			m->m_pkthdr.csum_data = 0xffff;
@@ -278,12 +277,12 @@
 	case AF_APPLETALK:
 		break;
 	default:
-		printf("looutput: af=%d unexpected\n", dst->sa_family);
+		printf("looutput: af=%d unexpected\n", af);
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 #endif
-	return (if_simloop(ifp, m, dst->sa_family, 0));
+	return (if_simloop(ifp, m, af, 0));
 }
 
 /*
@@ -396,7 +395,7 @@
 {
 
 	RT_LOCK_ASSERT(rt);
-	rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+	rt->rt_mtu = rt->rt_ifp->if_mtu;
 }
 
 /*

Added: trunk/sys/net/if_me.c
===================================================================
--- trunk/sys/net/if_me.c	                        (rev 0)
+++ trunk/sys/net/if_me.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,675 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 Andrey V. Elsukov <ae at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/net/if_me.c 290347 2015-11-04 00:21:02Z hrs $");
+
+#include <sys/param.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/libkern.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mbuf.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/protosw.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sx.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/vnet.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_encap.h>
+
+#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
+#define	MEMTU			1500
+static const char mename[] = "me";
+static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP");
+static VNET_DEFINE(struct mtx, me_mtx);
+#define	V_me_mtx	VNET(me_mtx)
+/* Minimal forwarding header RFC 2004 */
+struct mobhdr {
+	uint8_t		mob_proto;	/* protocol */
+	uint8_t		mob_flags;	/* flags */
+#define	MOB_FLAGS_SP	0x80		/* source present */
+	uint16_t	mob_csum;	/* header checksum */
+	struct in_addr	mob_dst;	/* original destination address */
+	struct in_addr	mob_src;	/* original source addr (optional) */
+} __packed;
+
+struct me_softc {
+	struct ifnet		*me_ifp;
+	LIST_ENTRY(me_softc)	me_list;
+	struct rmlock		me_lock;
+	u_int			me_fibnum;
+	const struct encaptab	*me_ecookie;
+	struct in_addr		me_src;
+	struct in_addr		me_dst;
+};
+#define	ME2IFP(sc)		((sc)->me_ifp)
+#define	ME_READY(sc)		((sc)->me_src.s_addr != 0)
+#define	ME_LOCK_INIT(sc)	rm_init(&(sc)->me_lock, "me softc")
+#define	ME_LOCK_DESTROY(sc)	rm_destroy(&(sc)->me_lock)
+#define	ME_RLOCK_TRACKER	struct rm_priotracker me_tracker
+#define	ME_RLOCK(sc)		rm_rlock(&(sc)->me_lock, &me_tracker)
+#define	ME_RUNLOCK(sc)		rm_runlock(&(sc)->me_lock, &me_tracker)
+#define	ME_RLOCK_ASSERT(sc)	rm_assert(&(sc)->me_lock, RA_RLOCKED)
+#define	ME_WLOCK(sc)		rm_wlock(&(sc)->me_lock)
+#define	ME_WUNLOCK(sc)		rm_wunlock(&(sc)->me_lock)
+#define	ME_WLOCK_ASSERT(sc)	rm_assert(&(sc)->me_lock, RA_WLOCKED)
+
+#define	ME_LIST_LOCK_INIT(x)	mtx_init(&V_me_mtx, "me_mtx", NULL, MTX_DEF)
+#define	ME_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_me_mtx)
+#define	ME_LIST_LOCK(x)		mtx_lock(&V_me_mtx)
+#define	ME_LIST_UNLOCK(x)	mtx_unlock(&V_me_mtx)
+
+static VNET_DEFINE(LIST_HEAD(, me_softc), me_softc_list);
+#define	V_me_softc_list	VNET(me_softc_list)
+static struct sx me_ioctl_sx;
+SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl");
+
+static int	me_clone_create(struct if_clone *, int, caddr_t);
+static void	me_clone_destroy(struct ifnet *);
+static VNET_DEFINE(struct if_clone *, me_cloner);
+#define	V_me_cloner	VNET(me_cloner)
+
+static void	me_qflush(struct ifnet *);
+static int	me_transmit(struct ifnet *, struct mbuf *);
+static int	me_ioctl(struct ifnet *, u_long, caddr_t);
+static int	me_output(struct ifnet *, struct mbuf *,
+		    const struct sockaddr *, struct route *);
+static int	me_input(struct mbuf **, int *, int);
+
+static int	me_set_tunnel(struct ifnet *, struct sockaddr_in *,
+    struct sockaddr_in *);
+static void	me_delete_tunnel(struct ifnet *);
+
+SYSCTL_DECL(_net_link);
+static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0,
+    "Minimal Encapsulation for IP (RFC 2004)");
+#ifndef MAX_ME_NEST
+#define MAX_ME_NEST 1
+#endif
+
+static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST;
+#define	V_max_me_nesting	VNET(max_me_nesting)
+SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
+    &VNET_NAME(max_me_nesting), 0, "Max nested tunnels");
+
+extern struct domain inetdomain;
+static void me_input10(struct mbuf *, int);
+static const struct protosw in_mobile_protosw = {
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_MOBILE,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		me_input10,
+	.pr_output =		(pr_output_t *)rip_output,
+	.pr_ctlinput =		rip_ctlinput,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_usrreqs =		&rip_usrreqs
+};
+
+static void
+vnet_me_init(const void *unused __unused)
+{
+	LIST_INIT(&V_me_softc_list);
+	ME_LIST_LOCK_INIT();
+	V_me_cloner = if_clone_simple(mename, me_clone_create,
+	    me_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_me_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_me_init, NULL);
+
+static void
+vnet_me_uninit(const void *unused __unused)
+{
+
+	if_clone_detach(V_me_cloner);
+	ME_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_me_uninit, NULL);
+
+static int
+me_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+	struct me_softc *sc;
+
+	sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO);
+	sc->me_fibnum = curthread->td_proc->p_fibnum;
+	ME2IFP(sc) = if_alloc(IFT_TUNNEL);
+	ME_LOCK_INIT(sc);
+	ME2IFP(sc)->if_softc = sc;
+	if_initname(ME2IFP(sc), mename, unit);
+
+	ME2IFP(sc)->if_mtu = MEMTU - sizeof(struct mobhdr);
+	ME2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
+	ME2IFP(sc)->if_output = me_output;
+	ME2IFP(sc)->if_ioctl = me_ioctl;
+	ME2IFP(sc)->if_transmit = me_transmit;
+	ME2IFP(sc)->if_qflush = me_qflush;
+	ME2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
+	ME2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
+	if_attach(ME2IFP(sc));
+	bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t));
+	ME_LIST_LOCK();
+	LIST_INSERT_HEAD(&V_me_softc_list, sc, me_list);
+	ME_LIST_UNLOCK();
+	return (0);
+}
+
+static void
+me_clone_destroy(struct ifnet *ifp)
+{
+	struct me_softc *sc;
+
+	sx_xlock(&me_ioctl_sx);
+	sc = ifp->if_softc;
+	me_delete_tunnel(ifp);
+	ME_LIST_LOCK();
+	LIST_REMOVE(sc, me_list);
+	ME_LIST_UNLOCK();
+	bpfdetach(ifp);
+	if_detach(ifp);
+	ifp->if_softc = NULL;
+	sx_xunlock(&me_ioctl_sx);
+
+	if_free(ifp);
+	ME_LOCK_DESTROY(sc);
+	free(sc, M_IFME);
+}
+
+static int
+me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	ME_RLOCK_TRACKER;
+	struct ifreq *ifr = (struct ifreq *)data;
+	struct sockaddr_in *src, *dst;
+	struct me_softc *sc;
+	int error;
+
+	switch (cmd) {
+	case SIOCSIFMTU:
+		if (ifr->ifr_mtu < 576)
+			return (EINVAL);
+		ifp->if_mtu = ifr->ifr_mtu - sizeof(struct mobhdr);
+		return (0);
+	case SIOCSIFADDR:
+		ifp->if_flags |= IFF_UP;
+	case SIOCSIFFLAGS:
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		return (0);
+	}
+	sx_xlock(&me_ioctl_sx);
+	sc = ifp->if_softc;
+	if (sc == NULL) {
+		error = ENXIO;
+		goto end;
+	}
+	error = 0;
+	switch (cmd) {
+	case SIOCSIFPHYADDR:
+		src = (struct sockaddr_in *)
+			&(((struct in_aliasreq *)data)->ifra_addr);
+		dst = (struct sockaddr_in *)
+			&(((struct in_aliasreq *)data)->ifra_dstaddr);
+		if (src->sin_family != dst->sin_family ||
+		    src->sin_family != AF_INET ||
+		    src->sin_len != dst->sin_len ||
+		    src->sin_len != sizeof(struct sockaddr_in)) {
+			error = EINVAL;
+			break;
+		}
+		if (src->sin_addr.s_addr == INADDR_ANY ||
+		    dst->sin_addr.s_addr == INADDR_ANY) {
+			error = EADDRNOTAVAIL;
+			break;
+		}
+		error = me_set_tunnel(ifp, src, dst);
+		break;
+	case SIOCDIFPHYADDR:
+		me_delete_tunnel(ifp);
+		break;
+	case SIOCGIFPSRCADDR:
+	case SIOCGIFPDSTADDR:
+		ME_RLOCK(sc);
+		if (!ME_READY(sc)) {
+			error = EADDRNOTAVAIL;
+			ME_RUNLOCK(sc);
+			break;
+		}
+		src = (struct sockaddr_in *)&ifr->ifr_addr;
+		memset(src, 0, sizeof(*src));
+		src->sin_family = AF_INET;
+		src->sin_len = sizeof(*src);
+		switch (cmd) {
+		case SIOCGIFPSRCADDR:
+			src->sin_addr = sc->me_src;
+			break;
+		case SIOCGIFPDSTADDR:
+			src->sin_addr = sc->me_dst;
+			break;
+		}
+		ME_RUNLOCK(sc);
+		error = prison_if(curthread->td_ucred, sintosa(src));
+		if (error != 0)
+			memset(src, 0, sizeof(*src));
+		break;
+	case SIOCGTUNFIB:
+		ifr->ifr_fib = sc->me_fibnum;
+		break;
+	case SIOCSTUNFIB:
+		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
+			break;
+		if (ifr->ifr_fib >= rt_numfibs)
+			error = EINVAL;
+		else
+			sc->me_fibnum = ifr->ifr_fib;
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+end:
+	sx_xunlock(&me_ioctl_sx);
+	return (error);
+}
+
+static int
+me_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+{
+	ME_RLOCK_TRACKER;
+	struct me_softc *sc;
+	struct ip *ip;
+	int ret;
+
+	sc = (struct me_softc *)arg;
+	if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
+		return (0);
+
+	M_ASSERTPKTHDR(m);
+
+	if (m->m_pkthdr.len < sizeof(struct ip) + sizeof(struct mobhdr) -
+	    sizeof(struct in_addr))
+		return (0);
+
+	ret = 0;
+	ME_RLOCK(sc);
+	if (ME_READY(sc)) {
+		ip = mtod(m, struct ip *);
+		if (sc->me_src.s_addr == ip->ip_dst.s_addr &&
+		    sc->me_dst.s_addr == ip->ip_src.s_addr)
+			ret = 32 * 2;
+	}
+	ME_RUNLOCK(sc);
+	return (ret);
+}
+
+static int
+me_set_tunnel(struct ifnet *ifp, struct sockaddr_in *src,
+    struct sockaddr_in *dst)
+{
+	struct me_softc *sc, *tsc;
+
+	sx_assert(&me_ioctl_sx, SA_XLOCKED);
+	ME_LIST_LOCK();
+	sc = ifp->if_softc;
+	LIST_FOREACH(tsc, &V_me_softc_list, me_list) {
+		if (tsc == sc || !ME_READY(tsc))
+			continue;
+		if (tsc->me_src.s_addr == src->sin_addr.s_addr &&
+		    tsc->me_dst.s_addr == dst->sin_addr.s_addr) {
+			ME_LIST_UNLOCK();
+			return (EADDRNOTAVAIL);
+		}
+	}
+	ME_LIST_UNLOCK();
+
+	ME_WLOCK(sc);
+	sc->me_dst = dst->sin_addr;
+	sc->me_src = src->sin_addr;
+	ME_WUNLOCK(sc);
+
+	if (sc->me_ecookie == NULL)
+		sc->me_ecookie = encap_attach_func(AF_INET, IPPROTO_MOBILE,
+		    me_encapcheck, &in_mobile_protosw, sc);
+	if (sc->me_ecookie != NULL) {
+		ifp->if_drv_flags |= IFF_DRV_RUNNING;
+		if_link_state_change(ifp, LINK_STATE_UP);
+	}
+	return (0);
+}
+
+static void
+me_delete_tunnel(struct ifnet *ifp)
+{
+	struct me_softc *sc = ifp->if_softc;
+
+	sx_assert(&me_ioctl_sx, SA_XLOCKED);
+	if (sc->me_ecookie != NULL)
+		encap_detach(sc->me_ecookie);
+	sc->me_ecookie = NULL;
+	ME_WLOCK(sc);
+	sc->me_src.s_addr = 0;
+	sc->me_dst.s_addr = 0;
+	ME_WUNLOCK(sc);
+	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+	if_link_state_change(ifp, LINK_STATE_DOWN);
+}
+
+static uint16_t
+me_in_cksum(uint16_t *p, int nwords)
+{
+	uint32_t sum = 0;
+
+	while (nwords-- > 0)
+		sum += *p++;
+	sum = (sum >> 16) + (sum & 0xffff);
+	sum += (sum >> 16);
+	return (~sum);
+}
+
+static void
+me_input10(struct mbuf *m, int off)
+{
+	int proto;
+
+	proto = (mtod(m, struct ip *))->ip_p;
+	me_input(&m, &off, proto);
+}
+
+int
+me_input(struct mbuf **mp, int *offp, int proto)
+{
+	struct me_softc *sc;
+	struct mobhdr *mh;
+	struct ifnet *ifp;
+	struct mbuf *m;
+	struct ip *ip;
+	int hlen;
+
+	m = *mp;
+	sc = encap_getarg(m);
+	KASSERT(sc != NULL, ("encap_getarg returned NULL"));
+
+	ifp = ME2IFP(sc);
+	/* checks for short packets */
+	hlen = sizeof(struct mobhdr);
+	if (m->m_pkthdr.len < sizeof(struct ip) + hlen)
+		hlen -= sizeof(struct in_addr);
+	if (m->m_len < sizeof(struct ip) + hlen)
+		m = m_pullup(m, sizeof(struct ip) + hlen);
+	if (m == NULL)
+		goto drop;
+	mh = (struct mobhdr *)mtodo(m, sizeof(struct ip));
+	/* check for wrong flags */
+	if (mh->mob_flags & (~MOB_FLAGS_SP)) {
+		m_freem(m);
+		goto drop;
+	}
+	if (mh->mob_flags) {
+	       if (hlen != sizeof(struct mobhdr)) {
+			m_freem(m);
+			goto drop;
+	       }
+	} else
+		hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
+	/* check mobile header checksum */
+	if (me_in_cksum((uint16_t *)mh, hlen / sizeof(uint16_t)) != 0) {
+		m_freem(m);
+		goto drop;
+	}
+#ifdef MAC
+	mac_ifnet_create_mbuf(ifp, m);
+#endif
+	ip = mtod(m, struct ip *);
+	ip->ip_dst = mh->mob_dst;
+	ip->ip_p = mh->mob_proto;
+	ip->ip_sum = 0;
+	ip->ip_len = htons(m->m_pkthdr.len - hlen);
+	if (mh->mob_flags)
+		ip->ip_src = mh->mob_src;
+	memmove(mtodo(m, hlen), ip, sizeof(struct ip));
+	m_adj(m, hlen);
+	m_clrprotoflags(m);
+	m->m_pkthdr.rcvif = ifp;
+	m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
+	M_SETFIB(m, ifp->if_fib);
+	hlen = AF_INET;
+	BPF_MTAP2(ifp, &hlen, sizeof(hlen), m);
+	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+	if ((ifp->if_flags & IFF_MONITOR) != 0)
+		m_freem(m);
+	else
+		netisr_dispatch(NETISR_IP, m);
+	return (IPPROTO_DONE);
+drop:
+	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+	return (IPPROTO_DONE);
+}
+
+#define	MTAG_ME	1414491977
+static int
+me_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+	struct m_tag *mtag;
+	int count;
+
+	count = 1;
+	mtag = NULL;
+	while ((mtag = m_tag_locate(m, MTAG_ME, 0, mtag)) != NULL) {
+		if (*(struct ifnet **)(mtag + 1) == ifp) {
+			log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
+			return (EIO);
+		}
+		count++;
+	}
+	if (count > V_max_me_nesting) {
+		log(LOG_NOTICE,
+		    "%s: if_output recursively called too many times(%d)\n",
+		    ifp->if_xname, count);
+		return (EIO);
+	}
+	mtag = m_tag_alloc(MTAG_ME, 0, sizeof(struct ifnet *), M_NOWAIT);
+	if (mtag == NULL)
+		return (ENOMEM);
+	*(struct ifnet **)(mtag + 1) = ifp;
+	m_tag_prepend(m, mtag);
+	return (0);
+}
+
+static int
+me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+   struct route *ro)
+{
+	uint32_t af;
+	int error;
+
+#ifdef MAC
+	error = mac_ifnet_check_transmit(ifp, m);
+	if (error != 0)
+		goto drop;
+#endif
+	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+	    (ifp->if_flags & IFF_UP) == 0) {
+		error = ENETDOWN;
+		goto drop;
+	}
+
+	error = me_check_nesting(ifp, m);
+	if (error != 0)
+		goto drop;
+
+	m->m_flags &= ~(M_BCAST|M_MCAST);
+	if (dst->sa_family == AF_UNSPEC)
+		bcopy(dst->sa_data, &af, sizeof(af));
+	else
+		af = dst->sa_family;
+	if (af != AF_INET) {
+		error = EAFNOSUPPORT;
+		goto drop;
+	}
+	BPF_MTAP2(ifp, &af, sizeof(af), m);
+	return (ifp->if_transmit(ifp, m));
+drop:
+	m_freem(m);
+	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+	return (error);
+}
+
+static int
+me_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+	ME_RLOCK_TRACKER;
+	struct mobhdr mh;
+	struct me_softc *sc;
+	struct ip *ip;
+	int error, hlen, plen;
+
+	sc = ifp->if_softc;
+	if (sc == NULL) {
+		error = ENETDOWN;
+		m_freem(m);
+		goto drop;
+	}
+	if (m->m_len < sizeof(struct ip))
+		m = m_pullup(m, sizeof(struct ip));
+	if (m == NULL) {
+		error = ENOBUFS;
+		goto drop;
+	}
+	ip = mtod(m, struct ip *);
+	/* Fragmented datagramms shouldn't be encapsulated */
+	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
+		error = EINVAL;
+		m_freem(m);
+		goto drop;
+	}
+	mh.mob_proto = ip->ip_p;
+	mh.mob_src = ip->ip_src;
+	mh.mob_dst = ip->ip_dst;
+	ME_RLOCK(sc);
+	if (!ME_READY(sc)) {
+		ME_RUNLOCK(sc);
+		error = ENETDOWN;
+		m_freem(m);
+		goto drop;
+	}
+	if (in_hosteq(sc->me_src, ip->ip_src)) {
+		hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
+		mh.mob_flags = 0;
+	} else {
+		hlen = sizeof(struct mobhdr);
+		mh.mob_flags = MOB_FLAGS_SP;
+	}
+	plen = m->m_pkthdr.len;
+	ip->ip_src = sc->me_src;
+	ip->ip_dst = sc->me_dst;
+	M_SETFIB(m, sc->me_fibnum);
+	ME_RUNLOCK(sc);
+	M_PREPEND(m, hlen, M_NOWAIT);
+	if (m == NULL) {
+		error = ENOBUFS;
+		goto drop;
+	}
+	if (m->m_len < sizeof(struct ip) + hlen)
+		m = m_pullup(m, sizeof(struct ip) + hlen);
+	if (m == NULL) {
+		error = ENOBUFS;
+		goto drop;
+	}
+	memmove(mtod(m, void *), mtodo(m, hlen), sizeof(struct ip));
+	ip = mtod(m, struct ip *);
+	ip->ip_len = htons(m->m_pkthdr.len);
+	ip->ip_p = IPPROTO_MOBILE;
+	ip->ip_sum = 0;
+	mh.mob_csum = 0;
+	mh.mob_csum = me_in_cksum((uint16_t *)&mh, hlen / sizeof(uint16_t));
+	bcopy(&mh, mtodo(m, sizeof(struct ip)), hlen);
+	error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
+drop:
+	if (error)
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+	else {
+		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+		if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
+	}
+	return (error);
+}
+
+static void
+me_qflush(struct ifnet *ifp __unused)
+{
+
+}
+
+static int
+memodevent(module_t mod, int type, void *data)
+{
+
+	switch (type) {
+	case MOD_LOAD:
+	case MOD_UNLOAD:
+		break;
+	default:
+		return (EOPNOTSUPP);
+	}
+	return (0);
+}
+
+static moduledata_t me_mod = {
+	"if_me",
+	memodevent,
+	0
+};
+
+DECLARE_MODULE(if_me, me_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_me, 1);


Property changes on: trunk/sys/net/if_me.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/if_media.c
===================================================================
--- trunk/sys/net/if_media.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_media.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,6 +1,6 @@
 /* $MidnightBSD$ */
 /*	$NetBSD: if_media.c,v 1.1 1997/03/17 02:55:15 thorpej Exp $	*/
-/* $FreeBSD: stable/9/sys/net/if_media.c 218909 2011-02-21 09:01:34Z brucec $ */
+/* $FreeBSD: stable/10/sys/net/if_media.c 313387 2017-02-07 15:12:27Z rstone $ */
 
 /*-
  * Copyright (c) 1997
@@ -69,6 +69,7 @@
     int flags, int mask);
 
 #ifdef IFMEDIA_DEBUG
+#include <net/if_var.h>
 int	ifmedia_debug = 0;
 SYSCTL_INT(_debug, OID_AUTO, ifmedia, CTLFLAG_RW, &ifmedia_debug,
 	    0, "if_media debugging msgs");
@@ -105,6 +106,7 @@
 		LIST_REMOVE(entry, ifm_list);
 		free(entry, M_IFADDR);
 	}
+	ifm->ifm_cur = NULL;
 }
 
 /*
@@ -194,6 +196,21 @@
 }
 
 /*
+ * Given a media word, return one suitable for an application
+ * using the original encoding.
+ */
+static int
+compat_media(int media)
+{
+
+	if (IFM_TYPE(media) == IFM_ETHER && IFM_SUBTYPE(media) > IFM_OTHER) {
+		media &= ~(IFM_ETH_XTYPE|IFM_TMASK);
+		media |= IFM_OTHER;
+	}
+	return (media);
+}
+
+/*
  * Device-independent media ioctl support function.
  */
 int
@@ -272,6 +289,7 @@
 	 * Get list of available media and current media on interface.
 	 */
 	case  SIOCGIFMEDIA: 
+	case  SIOCGIFXMEDIA: 
 	{
 		struct ifmedia_entry *ep;
 		int *kptr, count;
@@ -279,8 +297,13 @@
 
 		kptr = NULL;		/* XXX gcc */
 
-		ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
-		    ifm->ifm_cur->ifm_media : IFM_NONE;
+		if (cmd == SIOCGIFMEDIA) {
+			ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
+			    compat_media(ifm->ifm_cur->ifm_media) : IFM_NONE;
+		} else {
+			ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
+			    ifm->ifm_cur->ifm_media : IFM_NONE;
+		}
 		ifmr->ifm_mask = ifm->ifm_mask;
 		ifmr->ifm_status = 0;
 		(*ifm->ifm_status)(ifp, ifmr);
@@ -399,8 +422,7 @@
 	int i;
 
 	for (i = 0; ifmedia_baudrate_descriptions[i].ifmb_word != 0; i++) {
-		if ((mword & (IFM_NMASK|IFM_TMASK)) ==
-		    ifmedia_baudrate_descriptions[i].ifmb_word)
+		if (IFM_TYPE_MATCH(mword, ifmedia_baudrate_descriptions[i].ifmb_word))
 			return (ifmedia_baudrate_descriptions[i].ifmb_baudrate);
 	}
 
@@ -506,7 +528,7 @@
 		printf("<unknown type>\n");
 		return;
 	}
-	printf(desc->ifmt_string);
+	printf("%s", desc->ifmt_string);
 
 	/* Any mode. */
 	for (desc = ttos->modes; desc && desc->ifmt_string != NULL; desc++)

Modified: trunk/sys/net/if_media.h
===================================================================
--- trunk/sys/net/if_media.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_media.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,6 +1,6 @@
 /* $MidnightBSD$ */
 /*	$NetBSD: if_media.h,v 1.3 1997/03/26 01:19:27 thorpej Exp $	*/
-/* $FreeBSD: stable/9/sys/net/if_media.h 235764 2012-05-22 00:00:17Z jhb $ */
+/* $FreeBSD: stable/10/sys/net/if_media.h 283758 2015-05-29 23:02:12Z erj $ */
 
 /*-
  * Copyright (c) 1997
@@ -116,7 +116,7 @@
  *	----	-------
  *	0-4	Media variant
  *	5-7	Media type
- *	8-15	Type specific options
+ *	8-15	Type specific options (includes added variant bits on Ethernet)
  *	16-18	Mode (for multi-mode devices)
  *	19	RFU
  *	20-27	Shared (global) options
@@ -125,8 +125,18 @@
 
 /*
  * Ethernet
+ * In order to use more than 31 subtypes, Ethernet uses some of the option
+ * bits as part of the subtype field.  See the options section below for
+ * relevant definitions
  */
 #define	IFM_ETHER	0x00000020
+#define	IFM_ETHER_SUBTYPE(x) (((x) & IFM_TMASK) | \
+	(((x) & (IFM_ETH_XTYPE >> IFM_ETH_XSHIFT)) << IFM_ETH_XSHIFT))
+#define	IFM_X(x) IFM_ETHER_SUBTYPE(x)	/* internal shorthand */
+#define	IFM_ETHER_SUBTYPE_SET(x) (IFM_ETHER_SUBTYPE(x) | IFM_ETHER)
+#define	IFM_ETHER_SUBTYPE_GET(x) ((x) & (IFM_TMASK|IFM_ETH_XTYPE))
+#define	IFM_ETHER_IS_EXTENDED(x)	((x) & IFM_ETH_XTYPE)
+
 #define	IFM_10_T	3		/* 10BaseT - RJ45 */
 #define	IFM_10_2	4		/* 10Base2 - Thinnet */
 #define	IFM_10_5	5		/* 10Base5 - AUI */
@@ -154,12 +164,49 @@
 #define	IFM_40G_CR4	27		/* 40GBase-CR4 */
 #define	IFM_40G_SR4	28		/* 40GBase-SR4 */
 #define	IFM_40G_LR4	29		/* 40GBase-LR4 */
+#define	IFM_1000_KX	30		/* 1000Base-KX backplane */
+#define	IFM_OTHER	31		/* Other: one of the following */
 
-/* note 31 is the max! */
+/* following types are not visible to old binaries using only IFM_TMASK */
+#define	IFM_10G_KX4	IFM_X(32)	/* 10GBase-KX4 backplane */
+#define	IFM_10G_KR	IFM_X(33)	/* 10GBase-KR backplane */
+#define	IFM_10G_CR1	IFM_X(34)	/* 10GBase-CR1 Twinax splitter */
+#define	IFM_20G_KR2	IFM_X(35)	/* 20GBase-KR2 backplane */
+#define	IFM_2500_KX	IFM_X(36)	/* 2500Base-KX backplane */
+#define	IFM_2500_T	IFM_X(37)	/* 2500Base-T - RJ45 (NBaseT) */
+#define	IFM_5000_T	IFM_X(38)	/* 5000Base-T - RJ45 (NBaseT) */
+#define	IFM_50G_PCIE	IFM_X(39)	/* 50G Ethernet over PCIE */
+#define	IFM_25G_PCIE	IFM_X(40)	/* 25G Ethernet over PCIE */
+#define	IFM_1000_SGMII	IFM_X(41)	/* 1G media interface */
+#define	IFM_10G_SFI	IFM_X(42)	/* 10G media interface */
+#define	IFM_40G_XLPPI	IFM_X(43)	/* 40G media interface */
+#define	IFM_1000_CX_SGMII IFM_X(44)	/* 1000Base-CX-SGMII */
+#define	IFM_40G_KR4	IFM_X(45)	/* 40GBase-KR4 */
+#define	IFM_10G_ER	IFM_X(46)	/* 10GBase-ER */
+#define	IFM_100G_CR4	IFM_X(47)	/* 100GBase-CR4 */
+#define	IFM_100G_SR4	IFM_X(48)	/* 100GBase-SR4 */
+#define	IFM_100G_KR4	IFM_X(49)	/* 100GBase-KR4 */
+#define	IFM_100G_LR4	IFM_X(50)	/* 100GBase-LR4 */
+#define	IFM_56G_R4	IFM_X(51)	/* 56GBase-R4 */
+#define	IFM_100_T	IFM_X(52)	/* 100BaseT - RJ45 */
+#define	IFM_25G_CR	IFM_X(53)	/* 25GBase-CR */
+#define	IFM_25G_KR	IFM_X(54)	/* 25GBase-KR */
+#define	IFM_25G_SR	IFM_X(55)	/* 25GBase-SR */
+#define	IFM_50G_CR2	IFM_X(56)	/* 50GBase-CR2 */
+#define	IFM_50G_KR2	IFM_X(57)	/* 50GBase-KR2 */
 
+/*
+ * Please update ieee8023ad_lacp.c:lacp_compose_key()
+ * after adding new Ethernet media types.
+ */
+/* Note IFM_X(511) is the max! */
+
+/* Ethernet option values; includes bits used for extended variant field */
 #define	IFM_ETH_MASTER	0x00000100	/* master mode (1000baseT) */
 #define	IFM_ETH_RXPAUSE	0x00000200	/* receive PAUSE frames */
 #define	IFM_ETH_TXPAUSE	0x00000400	/* transmit PAUSE frames */
+#define	IFM_ETH_XTYPE	0x00007800	/* extended media variants */
+#define	IFM_ETH_XSHIFT	6		/* shift XTYPE next to TMASK */
 
 /*
  * Token ring
@@ -251,11 +298,6 @@
 #define	IFM_ATM_UNASSIGNED	0x00000400	/* unassigned cells */
 
 /*
- * CARP Common Address Redundancy Protocol
- */
-#define	IFM_CARP	0x000000c0
-
-/*
  * Shared media sub-types
  */
 #define	IFM_AUTO	0		/* Autoselect best media */
@@ -307,7 +349,10 @@
  * Macros to extract various bits of information from the media word.
  */
 #define	IFM_TYPE(x)		((x) & IFM_NMASK)
-#define	IFM_SUBTYPE(x)		((x) & IFM_TMASK)
+#define	IFM_SUBTYPE(x)	\
+  (IFM_TYPE(x) == IFM_ETHER ? IFM_ETHER_SUBTYPE_GET(x) : ((x) & IFM_TMASK))
+#define	IFM_TYPE_MATCH(x,y) \
+  (IFM_TYPE(x) == IFM_TYPE(y) && IFM_SUBTYPE(x) == IFM_SUBTYPE(y))
 #define	IFM_TYPE_OPTIONS(x)	((x) & IFM_OMASK)
 #define	IFM_INST(x)		(((x) & IFM_IMASK) >> IFM_ISHIFT)
 #define	IFM_OPTIONS(x)		((x) & (IFM_OMASK | IFM_GMASK))
@@ -341,7 +386,6 @@
 	{ IFM_FDDI,		"FDDI" },				\
 	{ IFM_IEEE80211,	"IEEE 802.11 Wireless Ethernet" },	\
 	{ IFM_ATM,		"ATM" },				\
-	{ IFM_CARP,		"Common Address Redundancy Protocol" }, \
 	{ 0, NULL },							\
 }
 
@@ -373,6 +417,34 @@
 	{ IFM_40G_CR4,	"40Gbase-CR4" },				\
 	{ IFM_40G_SR4,	"40Gbase-SR4" },				\
 	{ IFM_40G_LR4,	"40Gbase-LR4" },				\
+	{ IFM_1000_KX,	"1000Base-KX" },				\
+	{ IFM_OTHER,	"Other" },					\
+	{ IFM_10G_KX4,	"10GBase-KX4" },				\
+	{ IFM_10G_KR,	"10GBase-KR" },					\
+	{ IFM_10G_CR1,	"10GBase-CR1" },				\
+	{ IFM_20G_KR2,	"20GBase-KR2" },				\
+	{ IFM_2500_KX,	"2500Base-KX" },				\
+	{ IFM_2500_T,	"2500Base-T" },					\
+	{ IFM_5000_T,	"5000Base-T" },					\
+	{ IFM_50G_PCIE,	"PCIExpress-50G" },				\
+	{ IFM_25G_PCIE,	"PCIExpress-25G" },				\
+	{ IFM_1000_SGMII,	"1000Base-SGMII" },			\
+	{ IFM_10G_SFI,	"10GBase-SFI" },				\
+	{ IFM_40G_XLPPI,	"40GBase-XLPPI" },			\
+	{ IFM_1000_CX_SGMII,	"1000Base-CX-SGMII" },			\
+	{ IFM_40G_KR4,	"40GBase-KR4" },				\
+	{ IFM_10G_ER,	"10GBase-ER" },					\
+	{ IFM_100G_CR4,	"100GBase-CR4" },				\
+	{ IFM_100G_SR4,	"100GBase-SR4" },				\
+	{ IFM_100G_KR4,	"100GBase-KR4" },				\
+	{ IFM_100G_LR4, "100GBase-LR4" },				\
+	{ IFM_56G_R4,	"56GBase-R4" },					\
+	{ IFM_100_T,	"100BaseT" },					\
+	{ IFM_25G_CR,	"25GBase-CR" },					\
+	{ IFM_25G_KR,	"25GBase-KR" },					\
+	{ IFM_25G_SR,	"25GBase-SR" },					\
+	{ IFM_50G_CR2,	"50GBase-CR2" },				\
+	{ IFM_50G_KR2,	"50GBase-KR2" },				\
 	{ 0, NULL },							\
 }
 
@@ -674,6 +746,33 @@
 	{ IFM_ETHER | IFM_40G_CR4,	IF_Gbps(40ULL) },		\
 	{ IFM_ETHER | IFM_40G_SR4,	IF_Gbps(40ULL) },		\
 	{ IFM_ETHER | IFM_40G_LR4,	IF_Gbps(40ULL) },		\
+	{ IFM_ETHER | IFM_1000_KX,	IF_Mbps(1000) },		\
+	{ IFM_ETHER | IFM_10G_KX4,	IF_Gbps(10ULL) },		\
+	{ IFM_ETHER | IFM_10G_KR,	IF_Gbps(10ULL) },		\
+	{ IFM_ETHER | IFM_10G_CR1,	IF_Gbps(10ULL) },		\
+	{ IFM_ETHER | IFM_20G_KR2,	IF_Gbps(20ULL) },		\
+	{ IFM_ETHER | IFM_2500_KX,	IF_Mbps(2500) },		\
+	{ IFM_ETHER | IFM_2500_T,	IF_Mbps(2500) },		\
+	{ IFM_ETHER | IFM_5000_T,	IF_Mbps(5000) },		\
+	{ IFM_ETHER | IFM_50G_PCIE,	IF_Gbps(50ULL) },		\
+	{ IFM_ETHER | IFM_25G_PCIE,	IF_Gbps(25ULL) },		\
+	{ IFM_ETHER | IFM_1000_SGMII,	IF_Mbps(1000) },		\
+	{ IFM_ETHER | IFM_10G_SFI,	IF_Gbps(10ULL) },		\
+	{ IFM_ETHER | IFM_40G_XLPPI,	IF_Gbps(40ULL) },		\
+	{ IFM_ETHER | IFM_1000_CX_SGMII, IF_Mbps(1000) },		\
+	{ IFM_ETHER | IFM_40G_KR4,	IF_Gbps(40ULL) },		\
+	{ IFM_ETHER | IFM_10G_ER,	IF_Gbps(10ULL) },		\
+	{ IFM_ETHER | IFM_100G_CR4,	IF_Gbps(100ULL) },		\
+	{ IFM_ETHER | IFM_100G_SR4,	IF_Gbps(100ULL) },		\
+	{ IFM_ETHER | IFM_100G_KR4,	IF_Gbps(100ULL) },		\
+	{ IFM_ETHER | IFM_100G_LR4,	IF_Gbps(100ULL) },		\
+	{ IFM_ETHER | IFM_56G_R4,	IF_Gbps(56ULL) },		\
+	{ IFM_ETHER | IFM_100_T,	IF_Mbps(100ULL) },		\
+	{ IFM_ETHER | IFM_25G_CR,	IF_Gbps(25ULL) },		\
+	{ IFM_ETHER | IFM_25G_KR,	IF_Gbps(25ULL) },		\
+	{ IFM_ETHER | IFM_25G_SR,	IF_Gbps(25ULL) },		\
+	{ IFM_ETHER | IFM_50G_CR2,	IF_Gbps(50ULL) },		\
+	{ IFM_ETHER | IFM_50G_KR2,	IF_Gbps(50ULL) },		\
 									\
 	{ IFM_TOKEN | IFM_TOK_STP4,	IF_Mbps(4) },			\
 	{ IFM_TOKEN | IFM_TOK_STP16,	IF_Mbps(16) },			\
@@ -728,8 +827,6 @@
 	    { "no network", "active" } },				\
 	{ IFM_ATM,		IFM_AVALID,	IFM_ACTIVE,		\
 	    { "no network", "active" } },				\
-	{ IFM_CARP,		IFM_AVALID,	IFM_ACTIVE,		\
-	    { "backup", "master" } },					\
 	{ 0,			0,		0,			\
 	    { NULL, NULL } }						\
 }

Modified: trunk/sys/net/if_mib.c
===================================================================
--- trunk/sys/net/if_mib.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_mib.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -27,7 +27,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/if_mib.c 248085 2013-03-09 02:36:32Z marius $
+ * $FreeBSD: stable/10/sys/net/if_mib.c 227309 2011-11-07 15:43:11Z ed $
  */
 
 #include <sys/param.h>

Modified: trunk/sys/net/if_mib.h
===================================================================
--- trunk/sys/net/if_mib.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_mib.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -27,7 +27,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/if_mib.h 154023 2006-01-04 12:57:09Z harti $
+ * $FreeBSD: stable/10/sys/net/if_mib.h 154023 2006-01-04 12:57:09Z harti $
  */
 
 #ifndef _NET_IF_MIB_H

Added: trunk/sys/net/if_pflog.h
===================================================================
--- trunk/sys/net/if_pflog.h	                        (rev 0)
+++ trunk/sys/net/if_pflog.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,67 @@
+/* $MidnightBSD$ */
+/* $OpenBSD: if_pflog.h,v 1.13 2006/10/23 12:46:09 henning Exp $ */
+/*
+ * Copyright 2001 Niels Provos <provos at citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NET_IF_PFLOG_H_
+#define	_NET_IF_PFLOG_H_
+
+#define	PFLOGIFS_MAX	16
+
+#define	PFLOG_RULESET_NAME_SIZE	16
+
+struct pfloghdr {
+	u_int8_t	length;
+	sa_family_t	af;
+	u_int8_t	action;
+	u_int8_t	reason;
+	char		ifname[IFNAMSIZ];
+	char		ruleset[PFLOG_RULESET_NAME_SIZE];
+	u_int32_t	rulenr;
+	u_int32_t	subrulenr;
+	uid_t		uid;
+	pid_t		pid;
+	uid_t		rule_uid;
+	pid_t		rule_pid;
+	u_int8_t	dir;
+	u_int8_t	pad[3];
+};
+
+#define	PFLOG_HDRLEN		sizeof(struct pfloghdr)
+/* minus pad, also used as a signature */
+#define	PFLOG_REAL_HDRLEN	offsetof(struct pfloghdr, pad)
+
+#ifdef _KERNEL
+struct pf_rule;
+struct pf_ruleset;
+struct pfi_kif;
+struct pf_pdesc;
+
+#define	PFLOG_PACKET(i,a,b,c,d,e,f,g,h,di) do {		\
+	if (pflog_packet_ptr != NULL)			\
+		pflog_packet_ptr(i,a,b,c,d,e,f,g,h,di);	\
+} while (0)
+#endif /* _KERNEL */
+#endif /* _NET_IF_PFLOG_H_ */


Property changes on: trunk/sys/net/if_pflog.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/net/if_pfsync.h
===================================================================
--- trunk/sys/net/if_pfsync.h	                        (rev 0)
+++ trunk/sys/net/if_pfsync.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,272 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2001 Michael Shalayeff
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 2008 David Gwynne <dlg at openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ *	$OpenBSD: if_pfsync.h,v 1.35 2008/06/29 08:42:15 mcbride Exp $
+ *	$FreeBSD: stable/10/sys/net/if_pfsync.h 254925 2013-08-26 18:16:05Z jhb $
+ */
+
+
+#ifndef _NET_IF_PFSYNC_H_
+#define	_NET_IF_PFSYNC_H_
+
+#define	PFSYNC_VERSION		5
+#define	PFSYNC_DFLTTL		255
+
+#define	PFSYNC_ACT_CLR		0	/* clear all states */
+#define	PFSYNC_ACT_INS		1	/* insert state */
+#define	PFSYNC_ACT_INS_ACK	2	/* ack of insterted state */
+#define	PFSYNC_ACT_UPD		3	/* update state */
+#define	PFSYNC_ACT_UPD_C	4	/* "compressed" update state */
+#define	PFSYNC_ACT_UPD_REQ	5	/* request "uncompressed" state */
+#define	PFSYNC_ACT_DEL		6	/* delete state */
+#define	PFSYNC_ACT_DEL_C	7	/* "compressed" delete state */
+#define	PFSYNC_ACT_INS_F	8	/* insert fragment */
+#define	PFSYNC_ACT_DEL_F	9	/* delete fragments */
+#define	PFSYNC_ACT_BUS		10	/* bulk update status */
+#define	PFSYNC_ACT_TDB		11	/* TDB replay counter update */
+#define	PFSYNC_ACT_EOF		12	/* end of frame */
+#define	PFSYNC_ACT_MAX		13
+
+/*
+ * A pfsync frame is built from a header followed by several sections which
+ * are all prefixed with their own subheaders. Frames must be terminated with
+ * an EOF subheader.
+ *
+ * | ...			|
+ * | IP header			|
+ * +============================+
+ * | pfsync_header		|
+ * +----------------------------+
+ * | pfsync_subheader		|
+ * +----------------------------+
+ * | first action fields	|
+ * | ...			|
+ * +----------------------------+
+ * | pfsync_subheader		|
+ * +----------------------------+
+ * | second action fields	|
+ * | ...			|
+ * +----------------------------+
+ * | EOF pfsync_subheader	|
+ * +----------------------------+
+ * | HMAC			|
+ * +============================+
+ */
+
+/*
+ * Frame header
+ */
+
+struct pfsync_header {
+	u_int8_t			version;
+	u_int8_t			_pad;
+	u_int16_t			len;
+	u_int8_t			pfcksum[PF_MD5_DIGEST_LENGTH];
+} __packed;
+
+/*
+ * Frame region subheader
+ */
+
+struct pfsync_subheader {
+	u_int8_t			action;
+	u_int8_t			_pad;
+	u_int16_t			count;
+} __packed;
+
+/*
+ * CLR
+ */
+
+struct pfsync_clr {
+	char				ifname[IFNAMSIZ];
+	u_int32_t			creatorid;
+} __packed;
+
+/*
+ * INS, UPD, DEL
+ */
+
+/* these use struct pfsync_state in pfvar.h */
+
+/*
+ * INS_ACK
+ */
+
+struct pfsync_ins_ack {
+	u_int64_t			id;
+	u_int32_t			creatorid;
+} __packed;
+
+/*
+ * UPD_C
+ */
+
+struct pfsync_upd_c {
+	u_int64_t			id;
+	struct pfsync_state_peer	src;
+	struct pfsync_state_peer	dst;
+	u_int32_t			creatorid;
+	u_int32_t			expire;
+	u_int8_t			timeout;
+	u_int8_t			_pad[3];
+} __packed;
+
+/*
+ * UPD_REQ
+ */
+
+struct pfsync_upd_req {
+	u_int64_t			id;
+	u_int32_t			creatorid;
+} __packed;
+
+/*
+ * DEL_C
+ */
+
+struct pfsync_del_c {
+	u_int64_t			id;
+	u_int32_t			creatorid;
+} __packed;
+
+/*
+ * INS_F, DEL_F
+ */
+
+/* not implemented (yet) */
+
+/*
+ * BUS
+ */
+
+struct pfsync_bus {
+	u_int32_t			creatorid;
+	u_int32_t			endtime;
+	u_int8_t			status;
+#define	PFSYNC_BUS_START			1
+#define	PFSYNC_BUS_END				2
+	u_int8_t			_pad[3];
+} __packed;
+
+/*
+ * TDB
+ */
+
+struct pfsync_tdb {
+	u_int32_t			spi;
+	union sockaddr_union		dst;
+	u_int32_t			rpl;
+	u_int64_t			cur_bytes;
+	u_int8_t			sproto;
+	u_int8_t			updates;
+	u_int8_t			_pad[2];
+} __packed;
+
+#define	PFSYNC_HDRLEN		sizeof(struct pfsync_header)
+
+/*
+ * Names for PFSYNC sysctl objects
+ */
+#define	PFSYNCCTL_STATS		1	/* PFSYNC stats */
+#define	PFSYNCCTL_MAXID		2
+
+struct pfsyncstats {
+	u_int64_t	pfsyncs_ipackets;	/* total input packets, IPv4 */
+	u_int64_t	pfsyncs_ipackets6;	/* total input packets, IPv6 */
+	u_int64_t	pfsyncs_badif;		/* not the right interface */
+	u_int64_t	pfsyncs_badttl;		/* TTL is not PFSYNC_DFLTTL */
+	u_int64_t	pfsyncs_hdrops;		/* packets shorter than hdr */
+	u_int64_t	pfsyncs_badver;		/* bad (incl unsupp) version */
+	u_int64_t	pfsyncs_badact;		/* bad action */
+	u_int64_t	pfsyncs_badlen;		/* data length does not match */
+	u_int64_t	pfsyncs_badauth;	/* bad authentication */
+	u_int64_t	pfsyncs_stale;		/* stale state */
+	u_int64_t	pfsyncs_badval;		/* bad values */
+	u_int64_t	pfsyncs_badstate;	/* insert/lookup failed */
+
+	u_int64_t	pfsyncs_opackets;	/* total output packets, IPv4 */
+	u_int64_t	pfsyncs_opackets6;	/* total output packets, IPv6 */
+	u_int64_t	pfsyncs_onomem;		/* no memory for an mbuf */
+	u_int64_t	pfsyncs_oerrors;	/* ip output error */
+
+	u_int64_t	pfsyncs_iacts[PFSYNC_ACT_MAX];
+	u_int64_t	pfsyncs_oacts[PFSYNC_ACT_MAX];
+};
+
+/*
+ * Configuration structure for SIOCSETPFSYNC SIOCGETPFSYNC
+ */
+struct pfsyncreq {
+	char		 pfsyncr_syncdev[IFNAMSIZ];
+	struct in_addr	 pfsyncr_syncpeer;
+	int		 pfsyncr_maxupdates;
+	int		 pfsyncr_defer;
+};
+
+#define	SIOCSETPFSYNC   _IOW('i', 247, struct ifreq)
+#define	SIOCGETPFSYNC   _IOWR('i', 248, struct ifreq)
+
+#ifdef _KERNEL
+
+/*
+ * this shows where a pf state is with respect to the syncing.
+ */
+#define	PFSYNC_S_INS	0x00
+#define	PFSYNC_S_IACK	0x01
+#define	PFSYNC_S_UPD	0x02
+#define	PFSYNC_S_UPD_C	0x03
+#define	PFSYNC_S_DEL	0x04
+#define	PFSYNC_S_COUNT	0x05
+
+#define	PFSYNC_S_DEFER	0xfe
+#define	PFSYNC_S_NONE	0xff
+
+#define	PFSYNC_SI_IOCTL		0x01
+#define	PFSYNC_SI_CKSUM		0x02
+#define	PFSYNC_SI_ACK		0x04
+
+#endif /* _KERNEL */
+
+#endif /* _NET_IF_PFSYNC_H_ */


Property changes on: trunk/sys/net/if_pfsync.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/if_sppp.h
===================================================================
--- trunk/sys/net/if_sppp.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_sppp.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -18,7 +18,7 @@
  *
  * From: Version 2.0, Fri Oct  6 20:39:21 MSK 1995
  *
- * $FreeBSD: stable/9/sys/net/if_sppp.h 147256 2005-06-10 16:49:24Z brooks $
+ * $FreeBSD: stable/10/sys/net/if_sppp.h 147256 2005-06-10 16:49:24Z brooks $
  */
 
 #ifndef _NET_IF_SPPP_H_

Modified: trunk/sys/net/if_spppfr.c
===================================================================
--- trunk/sys/net/if_spppfr.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_spppfr.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -21,12 +21,12 @@
  * works or modified versions.
  *
  * $Cronyx Id: if_spppfr.c,v 1.1.2.10 2004/06/29 09:02:30 rik Exp $
- * $FreeBSD: stable/9/sys/net/if_spppfr.c 223741 2011-07-03 16:08:38Z bz $
+ * $FreeBSD: stable/10/sys/net/if_spppfr.c 243882 2012-12-05 08:04:20Z glebius $
  */
 
 #include <sys/param.h>
 
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3
+#if defined(__FreeBSD__)
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipx.h"
@@ -46,7 +46,7 @@
 #include <sys/sockio.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3
+#if defined(__FreeBSD__)
 #include <sys/random.h>
 #endif
 #include <sys/malloc.h>
@@ -150,7 +150,7 @@
 	unsigned short  ptarget2;
 } __packed;
 
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3 && __FreeBSD_version < 501113
+#if defined(__FreeBSD__) && __FreeBSD_version < 501113
 #define	SPP_FMT		"%s%d: "
 #define	SPP_ARGS(ifp)	(ifp)->if_name, (ifp)->if_unit
 #else
@@ -305,7 +305,7 @@
 
 	/* Prepend the space for Frame Relay header. */
 	hlen = (family == AF_INET) ? 4 : 10;
-	M_PREPEND (m, hlen, M_DONTWAIT);
+	M_PREPEND (m, hlen, M_NOWAIT);
 	if (! m)
 		return 0;
 	h = mtod (m, u_char*);
@@ -382,7 +382,7 @@
 	unsigned char *h, *p;
 	struct mbuf *m;
 
-	MGETHDR (m, M_DONTWAIT, MT_DATA);
+	MGETHDR (m, M_NOWAIT, MT_DATA);
 	if (! m)
 		return;
 	m->m_pkthdr.rcvif = 0;
@@ -502,7 +502,7 @@
 			(unsigned char) his_ip_address);
 
 	/* Send the Inverse ARP reply. */
-	MGETHDR (m, M_DONTWAIT, MT_DATA);
+	MGETHDR (m, M_NOWAIT, MT_DATA);
 	if (! m)
 		return;
 	m->m_pkthdr.len = m->m_len = 10 + sizeof (*reply);

Modified: trunk/sys/net/if_spppsubr.c
===================================================================
--- trunk/sys/net/if_spppsubr.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_spppsubr.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -19,7 +19,7 @@
  *
  * From: Version 2.4, Thu Apr 30 17:17:21 MSD 1997
  *
- * $FreeBSD: stable/9/sys/net/if_spppsubr.c 249132 2013-04-05 08:22:11Z mav $
+ * $FreeBSD: stable/10/sys/net/if_spppsubr.c 314667 2017-03-04 13:03:31Z avg $
  */
 
 #include <sys/param.h>
@@ -263,7 +263,7 @@
 	int debug = ifp->if_flags & IFF_DEBUG
 
 static int sppp_output(struct ifnet *ifp, struct mbuf *m,
-		       struct sockaddr *dst, struct route *ro);
+	const struct sockaddr *dst, struct route *ro);
 
 static void sppp_cisco_send(struct sppp *sp, int type, long par1, long par2);
 static void sppp_cisco_input(struct sppp *sp, struct mbuf *m);
@@ -630,7 +630,7 @@
 				 * enough leading space in the existing mbuf).
 				 */
 				m_adj(m, vjlen);
-				M_PREPEND(m, hlen, M_DONTWAIT);
+				M_PREPEND(m, hlen, M_NOWAIT);
 				if (m == NULL) {
 					SPPP_UNLOCK(sp);
 					goto drop2;
@@ -786,19 +786,18 @@
  * Enqueue transmit packet.
  */
 static int
-sppp_output(struct ifnet *ifp, struct mbuf *m,
-	    struct sockaddr *dst, struct route *ro)
+sppp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+	struct route *ro)
 {
 	struct sppp *sp = IFP2SP(ifp);
 	struct ppp_header *h;
 	struct ifqueue *ifq = NULL;
-	int s, error, rv = 0;
+	int error, rv = 0;
 #ifdef INET
 	int ipproto = PPP_IP;
 #endif
 	int debug = ifp->if_flags & IFF_DEBUG;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 
 	if (!(ifp->if_flags & IFF_UP) ||
@@ -809,7 +808,6 @@
 #endif
 		m_freem (m);
 		SPPP_UNLOCK(sp);
-		splx (s);
 		return (ENETDOWN);
 	}
 
@@ -833,9 +831,7 @@
 		 * to start LCP for it.
 		 */
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
-		splx(s);
 		lcp.Open(sp);
-		s = splimp();
 	}
 
 #ifdef INET
@@ -859,7 +855,6 @@
 		{
 			m_freem(m);
 			SPPP_UNLOCK(sp);
-			splx(s);
 			if(ip->ip_p == IPPROTO_TCP)
 				return(EADDRNOTAVAIL);
 			else
@@ -904,7 +899,6 @@
 			default:
 				m_freem(m);
 				SPPP_UNLOCK(sp);
-				splx(s);
 				return (EINVAL);
 			}
 	}
@@ -927,7 +921,7 @@
 	/*
 	 * Prepend general data packet PPP header. For now, IP only.
 	 */
-	M_PREPEND (m, PPP_HEADER_LEN, M_DONTWAIT);
+	M_PREPEND (m, PPP_HEADER_LEN, M_NOWAIT);
 	if (! m) {
 nobufs:		if (debug)
 			log(LOG_DEBUG, SPP_FMT "no memory for transmit header\n",
@@ -934,7 +928,6 @@
 				SPP_ARGS(ifp));
 		++ifp->if_oerrors;
 		SPPP_UNLOCK(sp);
-		splx (s);
 		return (ENOBUFS);
 	}
 	/*
@@ -1001,7 +994,6 @@
 		m_freem (m);
 		++ifp->if_oerrors;
 		SPPP_UNLOCK(sp);
-		splx (s);
 		return (EAFNOSUPPORT);
 	}
 
@@ -1017,11 +1009,9 @@
 	if (error) {
 		++ifp->if_oerrors;
 		SPPP_UNLOCK(sp);
-		splx (s);
 		return (rv? rv: ENOBUFS);
 	}
 	SPPP_UNLOCK(sp);
-	splx (s);
 	/*
 	 * Unlike in sppp_input(), we can always bump the timestamp
 	 * here since sppp_output() is only called on behalf of
@@ -1041,7 +1031,7 @@
 	mtx_init(&sp->mtx, "sppp", MTX_NETWORK_LOCK, MTX_DEF | MTX_RECURSE);
 	
 	/* Initialize keepalive handler. */
- 	callout_init(&sp->keepalive_callout, CALLOUT_MPSAFE);
+ 	callout_init(&sp->keepalive_callout, 1);
 	callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive,
  		    (void *)sp); 
 
@@ -1073,7 +1063,7 @@
 #ifdef INET6
 	sp->confflags |= CONF_ENABLE_IPV6;
 #endif
- 	callout_init(&sp->ifstart_callout, CALLOUT_MPSAFE);
+ 	callout_init(&sp->ifstart_callout, 1);
 	sp->if_start = ifp->if_start;
 	ifp->if_start = sppp_ifstart;
 	sp->pp_comp = malloc(sizeof(struct slcompress), M_TEMP, M_WAITOK);
@@ -1138,14 +1128,12 @@
 sppp_isempty(struct ifnet *ifp)
 {
 	struct sppp *sp = IFP2SP(ifp);
-	int empty, s;
+	int empty;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 	empty = !sp->pp_fastq.ifq_head && !sp->pp_cpq.ifq_head &&
 		!SP2IFP(sp)->if_snd.ifq_head;
 	SPPP_UNLOCK(sp);
-	splx(s);
 	return (empty);
 }
 
@@ -1157,9 +1145,7 @@
 {
 	struct sppp *sp = IFP2SP(ifp);
 	struct mbuf *m;
-	int s;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 	/*
 	 * Process only the control protocol queue until we have at
@@ -1176,7 +1162,6 @@
 			IF_DEQUEUE (&SP2IFP(sp)->if_snd, m);
 	}
 	SPPP_UNLOCK(sp);
-	splx(s);
 	return m;
 }
 
@@ -1188,9 +1173,7 @@
 {
 	struct sppp *sp = IFP2SP(ifp);
 	struct mbuf *m;
-	int s;
 
-	s = splimp ();
 	SPPP_LOCK(sp);
 
 	m = sp->pp_cpq.ifq_head;
@@ -1201,7 +1184,6 @@
 		if ((m = sp->pp_fastq.ifq_head) == NULL)
 			m = SP2IFP(sp)->if_snd.ifq_head;
 	SPPP_UNLOCK(sp);
-	splx (s);
 	return (m);
 }
 
@@ -1213,14 +1195,12 @@
 {
 	struct ifreq *ifr = (struct ifreq*) data;
 	struct sppp *sp = IFP2SP(ifp);
-	int s, rv, going_up, going_down, newmode;
+	int rv, going_up, going_down, newmode;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 	rv = 0;
 	switch (cmd) {
 	case SIOCAIFADDR:
-	case SIOCSIFDSTADDR:
 		break;
 
 	case SIOCSIFADDR:
@@ -1321,7 +1301,6 @@
 		rv = ENOTTY;
 	}
 	SPPP_UNLOCK(sp);
-	splx(s);
 	return rv;
 }
 
@@ -1413,7 +1392,7 @@
 
 	getmicrouptime(&tv);
 
-	MGETHDR (m, M_DONTWAIT, MT_DATA);
+	MGETHDR (m, M_NOWAIT, MT_DATA);
 	if (! m)
 		return;
 	m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + CISCO_PACKET_LEN;
@@ -1461,7 +1440,7 @@
 
 	if (len > MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN)
 		len = MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN;
-	MGETHDR (m, M_DONTWAIT, MT_DATA);
+	MGETHDR (m, M_NOWAIT, MT_DATA);
 	if (! m)
 		return;
 	m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + LCP_HEADER_LEN + len;
@@ -2071,9 +2050,7 @@
 sppp_to_event(const struct cp *cp, struct sppp *sp)
 {
 	STDDCL;
-	int s;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "%s TO(%s) rst_counter = %d\n",
@@ -2123,7 +2100,6 @@
 		}
 
 	SPPP_UNLOCK(sp);
-	splx(s);
 }
 
 /*
@@ -2195,7 +2171,7 @@
 	sp->lcp.max_terminate = 2;
 	sp->lcp.max_configure = 10;
 	sp->lcp.max_failure = 10;
- 	callout_init(&sp->ch[IDX_LCP], CALLOUT_MPSAFE);
+ 	callout_init(&sp->ch[IDX_LCP], 1);
 }
 
 static void
@@ -2886,7 +2862,7 @@
 	sp->fail_counter[IDX_IPCP] = 0;
 	sp->pp_seq[IDX_IPCP] = 0;
 	sp->pp_rseq[IDX_IPCP] = 0;
- 	callout_init(&sp->ch[IDX_IPCP], CALLOUT_MPSAFE);
+ 	callout_init(&sp->ch[IDX_IPCP], 1);
 }
 
 static void
@@ -3445,7 +3421,7 @@
 	sp->fail_counter[IDX_IPV6CP] = 0;
 	sp->pp_seq[IDX_IPV6CP] = 0;
 	sp->pp_rseq[IDX_IPV6CP] = 0;
- 	callout_init(&sp->ch[IDX_IPV6CP], CALLOUT_MPSAFE);
+ 	callout_init(&sp->ch[IDX_IPV6CP], 1);
 }
 
 static void
@@ -3620,7 +3596,7 @@
 				continue;
 			}
 
-			bzero(&suggestaddr, sizeof(&suggestaddr));
+			bzero(&suggestaddr, sizeof(suggestaddr));
 			if (collision && nohisaddr) {
 				/* collision, hisaddr unknown - Conf-Rej */
 				type = CONF_REJ;
@@ -4026,7 +4002,7 @@
 {
 	STDDCL;
 	struct lcp_header *h;
-	int len, x;
+	int len;
 	u_char *value, *name, digest[AUTHKEYLEN], dsize;
 	int value_len, name_len;
 	MD5_CTX ctx;
@@ -4103,7 +4079,6 @@
 			}
 			log(-1, "\n");
 		}
-		x = splimp();
 		SPPP_LOCK(sp);
 		sp->pp_flags &= ~PP_NEEDAUTH;
 		if (sp->myauth.proto == PPP_CHAP &&
@@ -4115,11 +4090,9 @@
 			 * to network phase.
 			 */
 			SPPP_UNLOCK(sp);
-			splx(x);
 			break;
 		}
 		SPPP_UNLOCK(sp);
-		splx(x);
 		sppp_phase_network(sp);
 		break;
 
@@ -4253,7 +4226,7 @@
 	sp->fail_counter[IDX_CHAP] = 0;
 	sp->pp_seq[IDX_CHAP] = 0;
 	sp->pp_rseq[IDX_CHAP] = 0;
- 	callout_init(&sp->ch[IDX_CHAP], CALLOUT_MPSAFE);
+ 	callout_init(&sp->ch[IDX_CHAP], 1);
 }
 
 static void
@@ -4281,9 +4254,7 @@
 {
 	struct sppp *sp = (struct sppp *)cookie;
 	STDDCL;
-	int s;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "chap TO(%s) rst_counter = %d\n",
@@ -4314,7 +4285,6 @@
 		}
 
 	SPPP_UNLOCK(sp);
-	splx(s);
 }
 
 static void
@@ -4321,7 +4291,7 @@
 sppp_chap_tlu(struct sppp *sp)
 {
 	STDDCL;
-	int i, x;
+	int i;
 
 	i = 0;
 	sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure;
@@ -4352,7 +4322,6 @@
 			log(-1, "re-challenging supressed\n");
 	}
 
-	x = splimp();
 	SPPP_LOCK(sp);
 	/* indicate to LCP that we need to be closed down */
 	sp->lcp.protos |= (1 << IDX_CHAP);
@@ -4364,11 +4333,9 @@
 		 * phase.
 		 */
 		SPPP_UNLOCK(sp);
-		splx(x);
 		return;
 	}
 	SPPP_UNLOCK(sp);
-	splx(x);
 
 	/*
 	 * If we are already in phase network, we are done here.  This
@@ -4437,7 +4404,7 @@
 {
 	STDDCL;
 	struct lcp_header *h;
-	int len, x;
+	int len;
 	u_char *name, *passwd, mlen;
 	int name_len, passwd_len;
 
@@ -4524,7 +4491,6 @@
 			}
 			log(-1, "\n");
 		}
-		x = splimp();
 		SPPP_LOCK(sp);
 		sp->pp_flags &= ~PP_NEEDAUTH;
 		if (sp->myauth.proto == PPP_PAP &&
@@ -4536,11 +4502,9 @@
 			 * to network phase.
 			 */
 			SPPP_UNLOCK(sp);
-			splx(x);
 			break;
 		}
 		SPPP_UNLOCK(sp);
-		splx(x);
 		sppp_phase_network(sp);
 		break;
 
@@ -4584,8 +4548,8 @@
 	sp->fail_counter[IDX_PAP] = 0;
 	sp->pp_seq[IDX_PAP] = 0;
 	sp->pp_rseq[IDX_PAP] = 0;
- 	callout_init(&sp->ch[IDX_PAP], CALLOUT_MPSAFE);
- 	callout_init(&sp->pap_my_to_ch, CALLOUT_MPSAFE);
+ 	callout_init(&sp->ch[IDX_PAP], 1);
+ 	callout_init(&sp->pap_my_to_ch, 1);
 }
 
 static void
@@ -4621,9 +4585,7 @@
 {
 	struct sppp *sp = (struct sppp *)cookie;
 	STDDCL;
-	int s;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "pap TO(%s) rst_counter = %d\n",
@@ -4649,7 +4611,6 @@
 		}
 
 	SPPP_UNLOCK(sp);
-	splx(s);
 }
 
 /*
@@ -4676,7 +4637,6 @@
 sppp_pap_tlu(struct sppp *sp)
 {
 	STDDCL;
-	int x;
 
 	sp->rst_counter[IDX_PAP] = sp->lcp.max_configure;
 
@@ -4684,7 +4644,6 @@
 		log(LOG_DEBUG, SPP_FMT "%s tlu\n",
 		    SPP_ARGS(ifp), pap.name);
 
-	x = splimp();
 	SPPP_LOCK(sp);
 	/* indicate to LCP that we need to be closed down */
 	sp->lcp.protos |= (1 << IDX_PAP);
@@ -4696,11 +4655,9 @@
 		 * phase.
 		 */
 		SPPP_UNLOCK(sp);
-		splx(x);
 		return;
 	}
 	SPPP_UNLOCK(sp);
-	splx(x);
 	sppp_phase_network(sp);
 }
 
@@ -4765,7 +4722,7 @@
 	const char *msg;
 	va_list ap;
 
-	MGETHDR (m, M_DONTWAIT, MT_DATA);
+	MGETHDR (m, M_NOWAIT, MT_DATA);
 	if (! m)
 		return;
 	m->m_pkthdr.rcvif = 0;
@@ -4822,7 +4779,7 @@
 
 	n = ifq->ifq_head;
 	while ((m = n)) {
-		n = m->m_act;
+		n = m->m_nextpkt;
 		m_freem (m);
 	}
 	ifq->ifq_head = 0;
@@ -4838,9 +4795,7 @@
 {
 	struct sppp *sp = (struct sppp*)dummy;
 	struct ifnet *ifp = SP2IFP(sp);
-	int s;
 
-	s = splimp();
 	SPPP_LOCK(sp);
 	/* Keepalive mode disabled or channel down? */
 	if (! (sp->pp_flags & PP_KEEPALIVE) ||
@@ -4883,7 +4838,6 @@
 	}
 out:
 	SPPP_UNLOCK(sp);
-	splx(s);
  	callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive,
 		      (void *)sp);
 }
@@ -4933,7 +4887,7 @@
 
 #ifdef INET
 /*
- * Set my IP address.  Must be called at splimp.
+ * Set my IP address.
  */
 static void
 sppp_set_ip_addr(struct sppp *sp, u_long src)
@@ -5050,7 +5004,7 @@
 }
 
 /*
- * Set my IPv6 address.  Must be called at splimp.
+ * Set my IPv6 address.
  */
 static void
 sppp_set_ip6_addr(struct sppp *sp, const struct in6_addr *src)
@@ -5132,7 +5086,8 @@
 	 * Check the cmd word first before attempting to fetch all the
 	 * data.
 	 */
-	if ((subcmd = fuword(ifr->ifr_data)) == -1) {
+	rv = fueword(ifr->ifr_data, &subcmd);
+	if (rv == -1) {
 		rv = EFAULT;
 		goto quit;
 	}

Modified: trunk/sys/net/if_stf.c
===================================================================
--- trunk/sys/net/if_stf.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_stf.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,5 @@
 /* $MidnightBSD$ */
-/*	$FreeBSD: stable/9/sys/net/if_stf.c 248743 2013-03-26 18:57:25Z melifaro $	*/
+/*	$FreeBSD: stable/10/sys/net/if_stf.c 275828 2014-12-16 11:53:45Z ae $	*/
 /*	$KAME: if_stf.c,v 1.73 2001/12/03 11:08:30 keiichi Exp $	*/
 
 /*-
@@ -133,7 +133,6 @@
 SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RW | CTLFLAG_TUN,
     &stf_permit_rfc1918, 0, "Permit the use of private IPv4 addresses");
 
-#define STFNAME		"stf"
 #define STFUNIT		0
 
 #define IN6_IS_ADDR_6TO4(x)	(ntohs((x)->s6_addr16[0]) == 0x2002)
@@ -142,7 +141,7 @@
  * XXX: Return a pointer with 16-bit aligned.  Don't cast it to
  * struct in_addr *; use bcopy() instead.
  */
-#define GET_V4(x)	((caddr_t)(&(x)->s6_addr16[1]))
+#define GET_V4(x)	(&(x)->s6_addr16[1])
 
 struct stf_softc {
 	struct ifnet	*sc_ifp;
@@ -157,11 +156,13 @@
 };
 #define STF2IFP(sc)	((sc)->sc_ifp)
 
+static const char stfname[] = "stf";
+
 /*
  * Note that mutable fields in the softc are not currently locked.
  * We do lock sc_ro in stf_output though.
  */
-static MALLOC_DEFINE(M_STF, STFNAME, "6to4 Tunnel Interface");
+static MALLOC_DEFINE(M_STF, stfname, "6to4 Tunnel Interface");
 static const int ip_stf_ttl = 40;
 
 extern  struct domain inetdomain;
@@ -181,7 +182,7 @@
 static int stfmodevent(module_t, int, void *);
 static int stf_encapcheck(const struct mbuf *, int, int, void *);
 static struct in6_ifaddr *stf_getsrcifa6(struct ifnet *);
-static int stf_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+static int stf_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
 	struct route *);
 static int isrfc1918addr(struct in_addr *);
 static int stf_checkaddr4(struct stf_softc *, struct in_addr *,
@@ -194,8 +195,7 @@
 static int stf_clone_match(struct if_clone *, const char *);
 static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t);
 static int stf_clone_destroy(struct if_clone *, struct ifnet *);
-struct if_clone stf_cloner = IFC_CLONE_INITIALIZER(STFNAME, NULL, 0,
-    NULL, stf_clone_match, stf_clone_create, stf_clone_destroy);
+static struct if_clone *stf_cloner;
 
 static int
 stf_clone_match(struct if_clone *ifc, const char *name)
@@ -242,7 +242,7 @@
 	 * we don't conform to the default naming convention for interfaces.
 	 */
 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
-	ifp->if_dname = ifc->ifc_name;
+	ifp->if_dname = stfname;
 	ifp->if_dunit = IF_DUNIT_NONE;
 
 	mtx_init(&(sc)->sc_ro_mtx, "stf ro", NULL, MTX_DEF);
@@ -292,10 +292,11 @@
 
 	switch (type) {
 	case MOD_LOAD:
-		if_clone_attach(&stf_cloner);
+		stf_cloner = if_clone_advanced(stfname, 0, stf_clone_match,
+		    stf_clone_create, stf_clone_destroy);
 		break;
 	case MOD_UNLOAD:
-		if_clone_detach(&stf_cloner);
+		if_clone_detach(stf_cloner);
 		break;
 	default:
 		return (EOPNOTSUPP);
@@ -413,23 +414,19 @@
 }
 
 static int
-stf_output(ifp, m, dst, ro)
-	struct ifnet *ifp;
-	struct mbuf *m;
-	struct sockaddr *dst;
-	struct route *ro;
+stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+	struct route *ro)
 {
 	struct stf_softc *sc;
-	struct sockaddr_in6 *dst6;
+	const struct sockaddr_in6 *dst6;
 	struct route *cached_route;
 	struct in_addr in4;
-	caddr_t ptr;
+	const void *ptr;
 	struct sockaddr_in *dst4;
 	u_int8_t tos;
 	struct ip *ip;
 	struct ip6_hdr *ip6;
 	struct in6_ifaddr *ia6;
-	u_int32_t af;
 	int error;
 
 #ifdef MAC
@@ -441,7 +438,7 @@
 #endif
 
 	sc = ifp->if_softc;
-	dst6 = (struct sockaddr_in6 *)dst;
+	dst6 = (const struct sockaddr_in6 *)dst;
 
 	/* just in case */
 	if ((ifp->if_flags & IFF_UP) == 0) {
@@ -474,15 +471,6 @@
 	tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 
 	/*
-	 * BPF writes need to be handled specially.
-	 * This is a null operation, nothing here checks dst->sa_family.
-	 */
-	if (dst->sa_family == AF_UNSPEC) {
-		bcopy(dst->sa_data, &af, sizeof(af));
-		dst->sa_family = af;
-	}
-
-	/*
 	 * Pickup the right outer dst addr from the list of candidates.
 	 * ip6_dst has priority as it may be able to give us shorter IPv4 hops.
 	 */
@@ -507,13 +495,11 @@
 		 * will only read from the mbuf (i.e., it won't
 		 * try to free it or keep a pointer a to it).
 		 */
-		af = AF_INET6;
+		u_int af = AF_INET6;
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
 	}
 
-	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
-	if (m && m->m_len < sizeof(struct ip))
-		m = m_pullup(m, sizeof(struct ip));
+	M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
 	if (m == NULL) {
 		ifa_free(&ia6->ia_ifa);
 		ifp->if_oerrors++;
@@ -529,7 +515,7 @@
 	bcopy(&in4, &ip->ip_dst, sizeof(ip->ip_dst));
 	ip->ip_p = IPPROTO_IPV6;
 	ip->ip_ttl = ip_stf_ttl;
-	ip->ip_len = m->m_pkthdr.len;	/*host order*/
+	ip->ip_len = htons(m->m_pkthdr.len);
 	if (ifp->if_flags & IFF_LINK1)
 		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
 	else
@@ -625,10 +611,7 @@
 	 * reject packets with broadcast
 	 */
 	IN_IFADDR_RLOCK();
-	for (ia4 = TAILQ_FIRST(&V_in_ifaddrhead);
-	     ia4;
-	     ia4 = TAILQ_NEXT(ia4, ia_link))
-	{
+	TAILQ_FOREACH(ia4, &V_in_ifaddrhead, ia_link) {
 		if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
 			continue;
 		if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
@@ -800,7 +783,7 @@
 	struct rt_addrinfo *info;
 {
 	RT_LOCK_ASSERT(rt);
-	rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+	rt->rt_mtu = rt->rt_ifp->if_mtu;
 }
 
 static int

Modified: trunk/sys/net/if_stf.h
===================================================================
--- trunk/sys/net/if_stf.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_stf.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,5 @@
 /* $MidnightBSD$ */
-/*	$FreeBSD: stable/9/sys/net/if_stf.h 139823 2005-01-07 01:45:51Z imp $	*/
+/*	$FreeBSD: stable/10/sys/net/if_stf.h 139823 2005-01-07 01:45:51Z imp $	*/
 /*	$KAME: if_stf.h,v 1.5 2001/10/12 10:09:17 keiichi Exp $	*/
 
 /*-

Modified: trunk/sys/net/if_tap.c
===================================================================
--- trunk/sys/net/if_tap.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_tap.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -32,7 +32,7 @@
  */
 
 /*
- * $FreeBSD: stable/9/sys/net/if_tap.c 248085 2013-03-09 02:36:32Z marius $
+ * $FreeBSD: stable/10/sys/net/if_tap.c 326692 2017-12-08 15:26:57Z hselasky $
  * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
  */
 
@@ -80,8 +80,8 @@
 #define CDEV_NAME	"tap"
 #define TAPDEBUG	if (tapdebug) printf
 
-#define TAP		"tap"
-#define VMNET		"vmnet"
+static const char tapname[] = "tap";
+static const char vmnetname[] = "vmnet";
 #define TAPMAXUNIT	0x7fff
 #define VMNET_DEV_MASK	CLONE_FLAG0
 
@@ -100,12 +100,11 @@
 
 static int		tap_clone_create(struct if_clone *, int, caddr_t);
 static void		tap_clone_destroy(struct ifnet *);
+static struct if_clone *tap_cloner;
 static int		vmnet_clone_create(struct if_clone *, int, caddr_t);
 static void		vmnet_clone_destroy(struct ifnet *);
+static struct if_clone *vmnet_cloner;
 
-IFC_SIMPLE_DECLARE(tap, 0);
-IFC_SIMPLE_DECLARE(vmnet, 0);
-
 /* character device */
 static d_open_t		tapopen;
 static d_close_t	tapclose;
@@ -136,7 +135,7 @@
 
 static struct cdevsw	tap_cdevsw = {
 	.d_version =	D_VERSION,
-	.d_flags =	D_PSEUDO | D_NEEDMINOR,
+	.d_flags =	D_NEEDMINOR,
 	.d_open =	tapopen,
 	.d_close =	tapclose,
 	.d_read =	tapread,
@@ -184,18 +183,12 @@
 {
 	struct cdev *dev;
 	int i;
-	int extra;
 
-	if (strcmp(ifc->ifc_name, VMNET) == 0)
-		extra = VMNET_DEV_MASK;
-	else
-		extra = 0;
-
-	/* find any existing device, or allocate new unit number */
-	i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, extra);
+	/* Find any existing device, or allocate new unit number. */
+	i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, 0);
 	if (i) {
-		dev = make_dev(&tap_cdevsw, unit | extra,
-		     UID_ROOT, GID_WHEEL, 0600, "%s%d", ifc->ifc_name, unit);
+		dev = make_dev(&tap_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600,
+		    "%s%d", tapname, unit);
 	}
 
 	tapcreate(dev);
@@ -206,7 +199,18 @@
 static int
 vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
-	return tap_clone_create(ifc, unit, params);
+	struct cdev *dev;
+	int i;
+
+	/* Find any existing device, or allocate new unit number. */
+	i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, VMNET_DEV_MASK);
+	if (i) {
+		dev = make_dev(&tap_cdevsw, unit | VMNET_DEV_MASK, UID_ROOT,
+		    GID_WHEEL, 0600, "%s%d", vmnetname, unit);
+	}
+
+	tapcreate(dev);
+	return (0);
 }
 
 static void
@@ -214,16 +218,13 @@
 {
 	struct ifnet *ifp = tp->tap_ifp;
 
-	/* Unlocked read. */
-	KASSERT(!(tp->tap_flags & TAP_OPEN),
-		("%s flags is out of sync", ifp->if_xname));
-
 	CURVNET_SET(ifp->if_vnet);
+	destroy_dev(tp->tap_dev);
 	seldrain(&tp->tap_rsel);
+	knlist_clear(&tp->tap_rsel.si_note, 0);
 	knlist_destroy(&tp->tap_rsel.si_note);
-	destroy_dev(tp->tap_dev);
 	ether_ifdetach(ifp);
-	if_free_type(ifp, IFT_ETHER);
+	if_free(ifp);
 
 	mtx_destroy(&tp->tap_mtx);
 	free(tp, M_TAP);
@@ -275,8 +276,10 @@
 			mtx_destroy(&tapmtx);
 			return (ENOMEM);
 		}
-		if_clone_attach(&tap_cloner);
-		if_clone_attach(&vmnet_cloner);
+		tap_cloner = if_clone_simple(tapname, tap_clone_create,
+		    tap_clone_destroy, 0);
+		vmnet_cloner = if_clone_simple(vmnetname, vmnet_clone_create,
+		    vmnet_clone_destroy, 0);
 		return (0);
 
 	case MOD_UNLOAD:
@@ -298,8 +301,8 @@
 		mtx_unlock(&tapmtx);
 
 		EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
-		if_clone_detach(&tap_cloner);
-		if_clone_detach(&vmnet_cloner);
+		if_clone_detach(tap_cloner);
+		if_clone_detach(vmnet_cloner);
 		drain_dev_clone_events();
 
 		mtx_lock(&tapmtx);
@@ -353,13 +356,13 @@
 	extra = 0;
 
 	/* We're interested in only tap/vmnet devices. */
-	if (strcmp(name, TAP) == 0) {
+	if (strcmp(name, tapname) == 0) {
 		unit = -1;
-	} else if (strcmp(name, VMNET) == 0) {
+	} else if (strcmp(name, vmnetname) == 0) {
 		unit = -1;
 		extra = VMNET_DEV_MASK;
-	} else if (dev_stdclone(name, NULL, TAP, &unit) != 1) {
-		if (dev_stdclone(name, NULL, VMNET, &unit) != 1) {
+	} else if (dev_stdclone(name, NULL, tapname, &unit) != 1) {
+		if (dev_stdclone(name, NULL, vmnetname, &unit) != 1) {
 			return;
 		} else {
 			extra = VMNET_DEV_MASK;
@@ -405,11 +408,9 @@
 	unsigned short		 macaddr_hi;
 	uint32_t		 macaddr_mid;
 	int			 unit;
-	char			*name = NULL;
+	const char		*name = NULL;
 	u_char			eaddr[6];
 
-	dev->si_flags &= ~SI_CHEAPCLONE;
-
 	/* allocate driver storage and create device */
 	tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
 	mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
@@ -421,10 +422,10 @@
 
 	/* select device: tap or vmnet */
 	if (unit & VMNET_DEV_MASK) {
-		name = VMNET;
+		name = vmnetname;
 		tp->tap_flags |= TAP_VMNET;
 	} else
-		name = TAP;
+		name = tapname;
 
 	unit &= TAPMAXUNIT;
 
@@ -736,9 +737,10 @@
 	switch (cmd) {
 		case TAPSIFINFO:
 			tapp = (struct tapinfo *)data;
+			if (ifp->if_type != tapp->type)
+				return (EPROTOTYPE);
 			mtx_lock(&tp->tap_mtx);
 			ifp->if_mtu = tapp->mtu;
-			ifp->if_type = tapp->type;
 			ifp->if_baudrate = tapp->baudrate;
 			mtx_unlock(&tp->tap_mtx);
 			break;
@@ -947,7 +949,7 @@
 		return (EIO);
 	}
 
-	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, ETHER_ALIGN,
+	if ((m = m_uiotombuf(uio, M_NOWAIT, 0, ETHER_ALIGN,
 	    M_PKTHDR)) == NULL) {
 		ifp->if_ierrors ++;
 		return (ENOBUFS);

Modified: trunk/sys/net/if_tap.h
===================================================================
--- trunk/sys/net/if_tap.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_tap.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -32,7 +32,7 @@
  */
 
 /*
- * $FreeBSD: stable/9/sys/net/if_tap.h 182880 2008-09-08 22:43:55Z emax $
+ * $FreeBSD: stable/10/sys/net/if_tap.h 182880 2008-09-08 22:43:55Z emax $
  * $Id: if_tap.h,v 0.7 2000/07/12 04:12:51 max Exp $
  */
 

Modified: trunk/sys/net/if_tapvar.h
===================================================================
--- trunk/sys/net/if_tapvar.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_tapvar.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -35,7 +35,7 @@
  */
 
 /*
- * $FreeBSD: stable/9/sys/net/if_tapvar.h 147256 2005-06-10 16:49:24Z brooks $
+ * $FreeBSD: stable/10/sys/net/if_tapvar.h 240942 2012-09-25 23:41:45Z emaste $
  * $Id: if_tapvar.h,v 0.6 2000/07/11 02:16:08 max Exp $
  */
 

Modified: trunk/sys/net/if_tun.c
===================================================================
--- trunk/sys/net/if_tun.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_tun.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -14,7 +14,7 @@
  * UCL. This driver is based much more on read/write/poll mode of
  * operation though.
  *
- * $FreeBSD: stable/9/sys/net/if_tun.c 248085 2013-03-09 02:36:32Z marius $
+ * $FreeBSD: stable/10/sys/net/if_tun.c 326692 2017-12-08 15:26:57Z hselasky $
  */
 
 #include "opt_atalk.h"
@@ -100,7 +100,6 @@
 #define TUN2IFP(sc)	((sc)->tun_ifp)
 
 #define TUNDEBUG	if (tundebug) if_printf
-#define	TUNNAME		"tun"
 
 /*
  * All mutable global variables in if_tun are locked using tunmtx, with
@@ -108,7 +107,8 @@
  * which is static after setup.
  */
 static struct mtx tunmtx;
-static MALLOC_DEFINE(M_TUN, TUNNAME, "Tunnel Interface");
+static const char tunname[] = "tun";
+static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
 static int tundebug = 0;
 static int tundclone = 1;
 static struct clonedevs *tunclones;
@@ -129,15 +129,14 @@
 static int	tunifioctl(struct ifnet *, u_long, caddr_t);
 static void	tuninit(struct ifnet *);
 static int	tunmodevent(module_t, int, void *);
-static int	tunoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
-		    struct route *ro);
+static int	tunoutput(struct ifnet *, struct mbuf *,
+		    const struct sockaddr *, struct route *ro);
 static void	tunstart(struct ifnet *);
 
 static int	tun_clone_create(struct if_clone *, int, caddr_t);
 static void	tun_clone_destroy(struct ifnet *);
+static struct if_clone *tun_cloner;
 
-IFC_SIMPLE_DECLARE(tun, 0);
-
 static d_open_t		tunopen;
 static d_close_t	tunclose;
 static d_read_t		tunread;
@@ -166,7 +165,7 @@
 
 static struct cdevsw tun_cdevsw = {
 	.d_version =	D_VERSION,
-	.d_flags =	D_PSEUDO | D_NEEDMINOR,
+	.d_flags =	D_NEEDMINOR,
 	.d_open =	tunopen,
 	.d_close =	tunclose,
 	.d_read =	tunread,
@@ -174,7 +173,7 @@
 	.d_ioctl =	tunioctl,
 	.d_poll =	tunpoll,
 	.d_kqfilter =	tunkqfilter,
-	.d_name =	TUNNAME,
+	.d_name =	tunname,
 };
 
 static int
@@ -188,9 +187,9 @@
 	if (i) {
 		/* No preexisting struct cdev *, create one */
 		dev = make_dev(&tun_cdevsw, unit,
-		    UID_UUCP, GID_DIALER, 0600, "%s%d", ifc->ifc_name, unit);
+		    UID_UUCP, GID_DIALER, 0600, "%s%d", tunname, unit);
 	}
-	tuncreate(ifc->ifc_name, dev);
+	tuncreate(tunname, dev);
 
 	return (0);
 }
@@ -212,9 +211,9 @@
 	if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)
 		return;
 
-	if (strcmp(name, TUNNAME) == 0) {
+	if (strcmp(name, tunname) == 0) {
 		u = -1;
-	} else if (dev_stdclone(name, NULL, TUNNAME, &u) != 1)
+	} else if (dev_stdclone(name, NULL, tunname, &u) != 1)
 		return;	/* Don't recognise the name */
 	if (u != -1 && u > IF_MAXUNIT)
 		return;	/* Unit number too high */
@@ -247,7 +246,6 @@
 {
 	struct cdev *dev;
 
-	/* Unlocked read. */
 	mtx_lock(&tp->tun_mtx);
 	if ((tp->tun_flags & TUN_OPEN) != 0)
 		cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
@@ -261,6 +259,7 @@
 	if_free(TUN2IFP(tp));
 	destroy_dev(dev);
 	seldrain(&tp->tun_rsel);
+	knlist_clear(&tp->tun_rsel.si_note, 0);
 	knlist_destroy(&tp->tun_rsel.si_note);
 	mtx_destroy(&tp->tun_mtx);
 	cv_destroy(&tp->tun_cv);
@@ -292,10 +291,11 @@
 		tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
 		if (tag == NULL)
 			return (ENOMEM);
-		if_clone_attach(&tun_cloner);
+		tun_cloner = if_clone_simple(tunname, tun_clone_create,
+		    tun_clone_destroy, 0);
 		break;
 	case MOD_UNLOAD:
-		if_clone_detach(&tun_cloner);
+		if_clone_detach(tun_cloner);
 		EVENTHANDLER_DEREGISTER(dev_clone, tag);
 		drain_dev_clone_events();
 
@@ -323,6 +323,7 @@
 };
 
 DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_tun, 1);
 
 static void
 tunstart(struct ifnet *ifp)
@@ -362,8 +363,6 @@
 	struct tun_softc *sc;
 	struct ifnet *ifp;
 
-	dev->si_flags &= ~SI_CHEAPCLONE;
-
 	sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
 	mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
 	cv_init(&sc->tun_cv, "tun_condvar");
@@ -410,7 +409,7 @@
 	 */
 	tp = dev->si_drv1;
 	if (!tp) {
-		tuncreate(TUNNAME, dev);
+		tuncreate(tunname, dev);
 		tp = dev->si_drv1;
 	}
 
@@ -555,10 +554,6 @@
 		tuninit(ifp);
 		TUNDEBUG(ifp, "address set\n");
 		break;
-	case SIOCSIFDSTADDR:
-		tuninit(ifp);
-		TUNDEBUG(ifp, "destination address set\n");
-		break;
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
 		TUNDEBUG(ifp, "mtu set\n");
@@ -577,7 +572,7 @@
  * tunoutput - queue packets from higher level ready to put out.
  */
 static int
-tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
+tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct tun_softc *tp = ifp->if_softc;
@@ -611,20 +606,18 @@
 	}
 
 	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC) {
+	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
-		dst->sa_family = af; 
-	}
+	else
+		af = dst->sa_family;
 
-	if (bpf_peers_present(ifp->if_bpf)) {
-		af = dst->sa_family;
+	if (bpf_peers_present(ifp->if_bpf))
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
-	}
 
 	/* prepend sockaddr? this may abort if the mbuf allocation fails */
 	if (cached_tun_flags & TUN_LMODE) {
 		/* allocate space for sockaddr */
-		M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
+		M_PREPEND(m0, dst->sa_len, M_NOWAIT);
 
 		/* if allocation failed drop packet */
 		if (m0 == NULL) {
@@ -638,7 +631,7 @@
 
 	if (cached_tun_flags & TUN_IFHEAD) {
 		/* Prepend the address family */
-		M_PREPEND(m0, 4, M_DONTWAIT);
+		M_PREPEND(m0, 4, M_NOWAIT);
 
 		/* if allocation failed drop packet */
 		if (m0 == NULL) {
@@ -646,10 +639,10 @@
 			ifp->if_oerrors++;
 			return (ENOBUFS);
 		} else
-			*(u_int32_t *)m0->m_data = htonl(dst->sa_family);
+			*(u_int32_t *)m0->m_data = htonl(af);
 	} else {
 #ifdef INET
-		if (dst->sa_family != AF_INET)
+		if (af != AF_INET)
 #endif
 		{
 			m_freem(m0);
@@ -685,9 +678,10 @@
 			if (error)
 				return (error);
 		}
+		if (TUN2IFP(tp)->if_type != tunp->type)
+			return (EPROTOTYPE);
 		mtx_lock(&tp->tun_mtx);
 		TUN2IFP(tp)->if_mtu = tunp->mtu;
-		TUN2IFP(tp)->if_type = tunp->type;
 		TUN2IFP(tp)->if_baudrate = tunp->baudrate;
 		mtx_unlock(&tp->tun_mtx);
 		break;
@@ -874,7 +868,7 @@
 		return (EIO);
 	}
 
-	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0, M_PKTHDR)) == NULL) {
+	if ((m = m_uiotombuf(uio, M_NOWAIT, 0, 0, M_PKTHDR)) == NULL) {
 		ifp->if_ierrors++;
 		return (ENOBUFS);
 	}
@@ -925,9 +919,8 @@
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
-	/* First chunk of an mbuf contains good junk */
 	if (harvest.point_to_point)
-		random_harvest(m, 16, 3, 0, RANDOM_NET);
+		random_harvest(&(m->m_data), 12, 2, RANDOM_NET_TUN);
 	ifp->if_ibytes += m->m_pkthdr.len;
 	ifp->if_ipackets++;
 	CURVNET_SET(ifp->if_vnet);

Modified: trunk/sys/net/if_tun.h
===================================================================
--- trunk/sys/net/if_tun.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_tun.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -14,7 +14,7 @@
  * UCL. This driver is based much more on read/write/select mode of
  * operation though.
  *
- * $FreeBSD: stable/9/sys/net/if_tun.h 139823 2005-01-07 01:45:51Z imp $
+ * $FreeBSD: stable/10/sys/net/if_tun.h 139823 2005-01-07 01:45:51Z imp $
  */
 
 #ifndef _NET_IF_TUN_H_

Modified: trunk/sys/net/if_types.h
===================================================================
--- trunk/sys/net/if_types.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_types.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_types.h	8.3 (Berkeley) 4/28/95
- * $FreeBSD: stable/9/sys/net/if_types.h 219819 2011-03-21 09:40:01Z jeff $
+ * $FreeBSD: stable/10/sys/net/if_types.h 228571 2011-12-16 12:16:56Z glebius $
  * $NetBSD: if_types.h,v 1.16 2000/04/19 06:30:53 itojun Exp $
  */
 
@@ -251,6 +251,5 @@
 #define	IFT_ENC		0xf4
 #define	IFT_PFLOG	0xf6
 #define	IFT_PFSYNC	0xf7
-#define	IFT_CARP	0xf8	/* Common Address Redundancy Protocol */
 #define IFT_IPXIP	0xf9	/* IPX over IP tunneling; no longer used. */
 #endif /* !_NET_IF_TYPES_H_ */

Modified: trunk/sys/net/if_var.h
===================================================================
--- trunk/sys/net/if_var.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_var.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	From: @(#)if.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: release/9.2.0/sys/net/if_var.h 252781 2013-07-05 13:48:32Z andre $
+ * $FreeBSD: stable/10/sys/net/if_var.h 318505 2017-05-18 23:41:34Z rpokala $
  */
 
 #ifndef	_NET_IF_VAR_H_
@@ -70,6 +70,7 @@
 struct	socket;
 struct	ether_header;
 struct	carp_if;
+struct	carp_softc;
 struct  ifvlantrunk;
 struct	route;
 struct	vnet;
@@ -96,10 +97,30 @@
 
 TAILQ_HEAD(ifnethead, ifnet);	/* we use TAILQs so that the order of */
 TAILQ_HEAD(ifaddrhead, ifaddr);	/* instantiation is preserved in the list */
-TAILQ_HEAD(ifprefixhead, ifprefix);
 TAILQ_HEAD(ifmultihead, ifmultiaddr);
 TAILQ_HEAD(ifgrouphead, ifg_group);
 
+#ifdef _KERNEL
+VNET_DECLARE(struct pfil_head, link_pfil_hook);	/* packet filter hooks */
+#define	V_link_pfil_hook	VNET(link_pfil_hook)
+#endif /* _KERNEL */
+
+typedef enum {
+	IFCOUNTER_IPACKETS = 0,
+	IFCOUNTER_IERRORS,
+	IFCOUNTER_OPACKETS,
+	IFCOUNTER_OERRORS,
+	IFCOUNTER_COLLISIONS,
+	IFCOUNTER_IBYTES,
+	IFCOUNTER_OBYTES,
+	IFCOUNTER_IMCASTS,
+	IFCOUNTER_OMCASTS,
+	IFCOUNTER_IQDROPS,
+	IFCOUNTER_OQDROPS,
+	IFCOUNTER_NOPROTO,
+	IFCOUNTERS /* Array size. */
+} ift_counter;
+
 /*
  * Structure defining a queue for a network interface.
  */
@@ -112,6 +133,12 @@
 	struct	mtx ifq_mtx;
 };
 
+struct ifnet_hw_tsomax {
+	u_int	tsomaxbytes;	/* TSO total burst length limit in bytes */
+	u_int	tsomaxsegcount;	/* TSO maximum segment count */
+	u_int	tsomaxsegsize;	/* TSO maximum segment size in bytes */
+};
+
 /*
  * Structure defining a network interface.
  *
@@ -154,7 +181,7 @@
 	int	if_amcount;		/* number of all-multicast requests */
 /* procedure handles */
 	int	(*if_output)		/* output routine (enqueue) */
-		(struct ifnet *, struct mbuf *, struct sockaddr *,
+		(struct ifnet *, struct mbuf *, const struct sockaddr *,
 		     struct route *);
 	void	(*if_input)		/* input routine (from h/w driver) */
 		(struct ifnet *, struct mbuf *);
@@ -184,16 +211,16 @@
 	struct	label *if_label;	/* interface MAC label */
 
 	/* these are only used by IPv6 */
-	struct	ifprefixhead if_prefixhead; /* list of prefixes per if */
+	void	*if_unused[2];
 	void	*if_afdata[AF_MAX];
 	int	if_afdata_initialized;
 	struct	rwlock if_afdata_lock;
 	struct	task if_linktask;	/* task for link change events */
-	struct	mtx if_addr_mtx;	/* mutex to protect address lists */
+	struct	rwlock if_addr_lock;	/* lock to protect address lists */
 
 	LIST_ENTRY(ifnet) if_clones;	/* interfaces of a cloner */
 	TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
-					/* protected by if_addr_mtx */
+					/* protected by if_addr_lock */
 	void	*if_pf_kif;
 	void	*if_lagg;		/* lagg glue */
 	char	*if_description;	/* interface description */
@@ -201,17 +228,40 @@
 	u_char	if_alloctype;		/* if_type at time of allocation */
 
 	/*
+	 * Network adapter TSO limits:
+	 * ===========================
+	 *
+	 * If the "if_hw_tsomax" field is zero the maximum segment
+	 * length limit does not apply. If the "if_hw_tsomaxsegcount"
+	 * or the "if_hw_tsomaxsegsize" field is zero the TSO segment
+	 * count limit does not apply. If all three fields are zero,
+	 * there is no TSO limit.
+	 *
+	 * NOTE: The TSO limits should reflect the values used in the
+	 * BUSDMA tag a network adapter is using to load a mbuf chain
+	 * for transmission. The TCP/IP network stack will subtract
+	 * space for all linklevel and protocol level headers and
+	 * ensure that the full mbuf chain passed to the network
+	 * adapter fits within the given limits.
+	 */
+	u_int	if_hw_tsomax;
+
+	/*
 	 * Spare fields are added so that we can modify sensitive data
 	 * structures without changing the kernel binary interface, and must
 	 * be used with care where binary compatibility is required.
 	 */
 	char	if_cspare[3];
-	u_int	if_hw_tsomax;		/* tso burst length limit, the minmum
-					 * is (IP_MAXPACKET / 8).
-					 * XXXAO: Have to find a better place
-					 * for it eventually. */
-	int	if_ispare[3];
-	void	*if_pspare[8];		/* 1 netmap, 7 TDB */
+	int	if_ispare[2];
+
+	/*
+	 * TSO fields for segment limits. If a field is zero below,
+	 * there is no limit:
+	 */
+	u_int	if_hw_tsomaxsegcount;	/* TSO maximum segment count */
+	u_int	if_hw_tsomaxsegsize;	/* TSO maximum segment size in bytes */
+	void	*if_pspare[7];		/* 1 netmap, 6 TDB */
+	void	*if_hw_addr;		/* hardware link-level address */
 };
 
 typedef void if_init_f_t(void *);
@@ -228,6 +278,7 @@
 #define	if_metric	if_data.ifi_metric
 #define	if_link_state	if_data.ifi_link_state
 #define	if_baudrate	if_data.ifi_baudrate
+#define	if_baudrate_pf	if_data.ifi_baudrate_pf
 #define	if_hwassist	if_data.ifi_hwassist
 #define	if_ipackets	if_data.ifi_ipackets
 #define	if_ierrors	if_data.ifi_ierrors
@@ -250,18 +301,14 @@
 /*
  * Locks for address lists on the network interface.
  */
-#define	IF_ADDR_LOCK_INIT(if)	mtx_init(&(if)->if_addr_mtx,		\
-				    "if_addr_mtx", NULL, MTX_DEF)
-#define	IF_ADDR_LOCK_DESTROY(if)	mtx_destroy(&(if)->if_addr_mtx)
-#define	IF_ADDR_WLOCK(if)	mtx_lock(&(if)->if_addr_mtx)
-#define	IF_ADDR_WUNLOCK(if)	mtx_unlock(&(if)->if_addr_mtx)
-#define	IF_ADDR_RLOCK(if)	mtx_lock(&(if)->if_addr_mtx)
-#define	IF_ADDR_RUNLOCK(if)	mtx_unlock(&(if)->if_addr_mtx)
-#define	IF_ADDR_LOCK_ASSERT(if)	mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
-#define	IF_ADDR_WLOCK_ASSERT(if)	mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
-/* XXX: Compat. */
-#define	IF_ADDR_LOCK(if)	IF_ADDR_WLOCK(if)
-#define	IF_ADDR_UNLOCK(if)	IF_ADDR_WUNLOCK(if)
+#define	IF_ADDR_LOCK_INIT(if)	rw_init(&(if)->if_addr_lock, "if_addr_lock")
+#define	IF_ADDR_LOCK_DESTROY(if)	rw_destroy(&(if)->if_addr_lock)
+#define	IF_ADDR_WLOCK(if)	rw_wlock(&(if)->if_addr_lock)
+#define	IF_ADDR_WUNLOCK(if)	rw_wunlock(&(if)->if_addr_lock)
+#define	IF_ADDR_RLOCK(if)	rw_rlock(&(if)->if_addr_lock)
+#define	IF_ADDR_RUNLOCK(if)	rw_runlock(&(if)->if_addr_lock)
+#define	IF_ADDR_LOCK_ASSERT(if)	rw_assert(&(if)->if_addr_lock, RA_LOCKED)
+#define	IF_ADDR_WLOCK_ASSERT(if) rw_assert(&(if)->if_addr_lock, RA_WLOCKED)
 
 /*
  * Function variations on locking macros intended to be used by loadable
@@ -277,7 +324,7 @@
  * Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq)
  * are queues of messages stored on ifqueue structures
  * (defined above).  Entries are added to and deleted from these structures
- * by these macros, which should be called with ipl raised to splimp().
+ * by these macros.
  */
 #define IF_LOCK(ifq)		mtx_lock(&(ifq)->ifq_mtx)
 #define IF_UNLOCK(ifq)		mtx_unlock(&(ifq)->ifq_mtx)
@@ -379,6 +426,11 @@
 /* Interface link state change event */
 typedef void (*ifnet_link_event_handler_t)(void *, struct ifnet *, int);
 EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t);
+/* Interface up/down event */
+#define IFNET_EVENT_UP		0
+#define IFNET_EVENT_DOWN	1
+typedef void (*ifnet_event_fn)(void *, struct ifnet *ifp, int event);
+EVENTHANDLER_DECLARE(ifnet_event, ifnet_event_fn);
 
 /*
  * interface groups
@@ -594,6 +646,18 @@
 } while (0)
 
 #ifdef _KERNEL
+static __inline void
+if_initbaudrate(struct ifnet *ifp, uintmax_t baud)
+{
+
+	ifp->if_baudrate_pf = 0;
+	while (baud > (u_long)(~0UL)) {
+		baud /= 10;
+		ifp->if_baudrate_pf++;
+	}
+	ifp->if_baudrate = baud;
+}
+
 static __inline int
 drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m)
 {	
@@ -648,7 +712,7 @@
 		return (m);
 	}
 #endif
-	return(buf_ring_peek(br));
+	return(buf_ring_peek_clear_sc(br));
 }
 
 static __inline void
@@ -781,6 +845,7 @@
 	struct	sockaddr *ifa_netmask;	/* used to determine subnet */
 	struct	if_data if_data;	/* not all members are meaningful */
 	struct	ifnet *ifa_ifp;		/* back-pointer to interface */
+	struct	carp_softc *ifa_carp;	/* pointer to CARP data */
 	TAILQ_ENTRY(ifaddr) ifa_link;	/* queue macro glue */
 	void	(*ifa_rtrequest)	/* check or clean routes (+ or -)'d */
 		(int, struct rtentry *, struct rt_addrinfo *);
@@ -807,20 +872,6 @@
 #endif
 
 /*
- * The prefix structure contains information about one prefix
- * of an interface.  They are maintained by the different address families,
- * are allocated and attached when a prefix or an address is set,
- * and are linked together so all prefixes for an interface can be located.
- */
-struct ifprefix {
-	struct	sockaddr *ifpr_prefix;	/* prefix of interface */
-	struct	ifnet *ifpr_ifp;	/* back-pointer to interface */
-	TAILQ_ENTRY(ifprefix) ifpr_list; /* queue macro glue */
-	u_char	ifpr_plen;		/* prefix length in bits */
-	u_char	ifpr_type;		/* protocol dependent prefix type */
-};
-
-/*
  * Multicast address structure.  This is analogous to the ifaddr
  * structure except that it keeps track of multicast addresses.
  */
@@ -917,7 +968,6 @@
 struct ifmultiaddr *
 	if_findmulti(struct ifnet *, struct sockaddr *);
 void	if_free(struct ifnet *);
-void	if_free_type(struct ifnet *, u_char);
 void	if_initname(struct ifnet *, const char *, int);
 void	if_link_state_change(struct ifnet *, int);
 int	if_printf(struct ifnet *, const char *, ...) __printflike(2, 3);
@@ -925,6 +975,7 @@
 void	if_ref(struct ifnet *);
 void	if_rele(struct ifnet *);
 int	if_setlladdr(struct ifnet *, const u_char *, int);
+int	if_gethwaddr(struct ifnet *, struct ifreq *);
 void	if_up(struct ifnet *);
 int	ifioctl(struct socket *, u_long, caddr_t, struct thread *);
 int	ifpromisc(struct ifnet *, int);
@@ -941,11 +992,13 @@
 int		ifa_ifwithaddr_check(struct sockaddr *);
 struct	ifaddr *ifa_ifwithbroadaddr(struct sockaddr *);
 struct	ifaddr *ifa_ifwithdstaddr(struct sockaddr *);
+struct	ifaddr *ifa_ifwithdstaddr_fib(struct sockaddr *, int);
 struct	ifaddr *ifa_ifwithnet(struct sockaddr *, int);
+struct	ifaddr *ifa_ifwithnet_fib(struct sockaddr *, int, int);
 struct	ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *);
 struct	ifaddr *ifa_ifwithroute_fib(int, struct sockaddr *, struct sockaddr *, u_int);
-
 struct	ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *);
+int	ifa_preferred(struct ifaddr *, struct ifaddr *);
 
 int	if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen);
 
@@ -953,6 +1006,8 @@
 typedef	void if_com_free_t(void *com, u_char type);
 void	if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f);
 void	if_deregister_com_alloc(u_char type);
+uint64_t if_get_counter_default(struct ifnet *, ift_counter);
+void	if_inc_counter(struct ifnet *, ift_counter, int64_t);
 
 #define IF_LLADDR(ifp)							\
     LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr))
@@ -965,6 +1020,10 @@
 int    ether_poll_deregister(struct ifnet *ifp);
 #endif /* DEVICE_POLLING */
 
+/* TSO */
+void if_hw_tsomax_common(struct ifnet *, struct ifnet_hw_tsomax *);
+int if_hw_tsomax_update(struct ifnet *, struct ifnet_hw_tsomax *);
+
 #endif /* _KERNEL */
 
 #endif /* !_NET_IF_VAR_H_ */

Modified: trunk/sys/net/if_vlan.c
===================================================================
--- trunk/sys/net/if_vlan.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_vlan.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -40,7 +40,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_vlan.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_vlan.c 332160 2018-04-07 00:04:28Z brooks $");
 
 #include "opt_inet.h"
 #include "opt_vlan.h"
@@ -73,7 +73,6 @@
 #include <netinet/if_ether.h>
 #endif
 
-#define VLANNAME	"vlan"
 #define	VLAN_DEF_HWIDTH	4
 #define	VLAN_IFFLAGS	(IFF_BROADCAST | IFF_MULTICAST)
 
@@ -121,7 +120,7 @@
 #endif
 };
 #define	ifv_proto	ifv_mib.ifvm_proto
-#define	ifv_tag		ifv_mib.ifvm_tag
+#define	ifv_vid		ifv_mib.ifvm_tag
 #define	ifv_encaplen	ifv_mib.ifvm_encaplen
 #define	ifv_mtufudge	ifv_mib.ifvm_mtufudge
 #define	ifv_mintu	ifv_mib.ifvm_mintu
@@ -146,7 +145,8 @@
 SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0,
 	   "pad short frames before tagging");
 
-static MALLOC_DEFINE(M_VLAN, VLANNAME, "802.1Q Virtual LAN Interface");
+static const char vlanname[] = "vlan";
+static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface");
 
 static eventhandler_tag ifdetach_tag;
 static eventhandler_tag iflladdr_tag;
@@ -169,7 +169,7 @@
 #define	VLAN_LOCK_ASSERT()	sx_assert(&ifv_lock, SA_LOCKED)
 #define	VLAN_LOCK()		sx_xlock(&ifv_lock)
 #define	VLAN_UNLOCK()		sx_xunlock(&ifv_lock)
-#define	TRUNK_LOCK_INIT(trunk)	rw_init(&(trunk)->rw, VLANNAME)
+#define	TRUNK_LOCK_INIT(trunk)	rw_init(&(trunk)->rw, vlanname)
 #define	TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw)
 #define	TRUNK_LOCK(trunk)	rw_wlock(&(trunk)->rw)
 #define	TRUNK_UNLOCK(trunk)	rw_wunlock(&(trunk)->rw)
@@ -185,7 +185,7 @@
 static	int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
 static	void vlan_growhash(struct ifvlantrunk *trunk, int howmuch);
 static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
-	uint16_t tag);
+	uint16_t vid);
 #endif
 static	void trunk_destroy(struct ifvlantrunk *trunk);
 
@@ -205,7 +205,7 @@
 static	void vlan_capabilities(struct ifvlan *ifv);
 static	void vlan_trunk_capabilities(struct ifnet *ifp);
 
-static	struct ifnet *vlan_clone_match_ethertag(struct if_clone *,
+static	struct ifnet *vlan_clone_match_ethervid(struct if_clone *,
     const char *, int *);
 static	int vlan_clone_match(struct if_clone *, const char *);
 static	int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t);
@@ -214,11 +214,10 @@
 static	void vlan_ifdetach(void *arg, struct ifnet *ifp);
 static  void vlan_iflladdr(void *arg, struct ifnet *ifp);
 
-static	struct if_clone vlan_cloner = IFC_CLONE_INITIALIZER(VLANNAME, NULL,
-    IF_MAXUNIT, NULL, vlan_clone_match, vlan_clone_create, vlan_clone_destroy);
+static struct if_clone *vlan_cloner;
 
 #ifdef VIMAGE
-static VNET_DEFINE(struct if_clone, vlan_cloner);
+static VNET_DEFINE(struct if_clone *, vlan_cloner);
 #define	V_vlan_cloner	VNET(vlan_cloner)
 #endif
 
@@ -273,9 +272,9 @@
 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
 
 	b = 1 << trunk->hwidth;
-	i = HASH(ifv->ifv_tag, trunk->hmask);
+	i = HASH(ifv->ifv_vid, trunk->hmask);
 	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
-		if (ifv->ifv_tag == ifv2->ifv_tag)
+		if (ifv->ifv_vid == ifv2->ifv_vid)
 			return (EEXIST);
 
 	/*
@@ -285,7 +284,7 @@
 	 */
 	if (trunk->refcnt > (b * b) / 2) {
 		vlan_growhash(trunk, 1);
-		i = HASH(ifv->ifv_tag, trunk->hmask);
+		i = HASH(ifv->ifv_vid, trunk->hmask);
 	}
 	LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
 	trunk->refcnt++;
@@ -303,7 +302,7 @@
 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
 	
 	b = 1 << trunk->hwidth;
-	i = HASH(ifv->ifv_tag, trunk->hmask);
+	i = HASH(ifv->ifv_vid, trunk->hmask);
 	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
 		if (ifv2 == ifv) {
 			trunk->refcnt--;
@@ -355,7 +354,7 @@
 	for (i = 0; i < n; i++)
 		while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) {
 			LIST_REMOVE(ifv, ifv_list);
-			j = HASH(ifv->ifv_tag, n2 - 1);
+			j = HASH(ifv->ifv_vid, n2 - 1);
 			LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
 		}
 	free(trunk->hash, M_VLAN);
@@ -369,14 +368,14 @@
 }
 
 static __inline struct ifvlan *
-vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag)
+vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
 {
 	struct ifvlan *ifv;
 
 	TRUNK_LOCK_RASSERT(trunk);
 
-	LIST_FOREACH(ifv, &trunk->hash[HASH(tag, trunk->hmask)], ifv_list)
-		if (ifv->ifv_tag == tag)
+	LIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list)
+		if (ifv->ifv_vid == vid)
 			return (ifv);
 	return (NULL);
 }
@@ -400,10 +399,10 @@
 #else
 
 static __inline struct ifvlan *
-vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag)
+vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
 {
 
-	return trunk->vlans[tag];
+	return trunk->vlans[vid];
 }
 
 static __inline int
@@ -410,9 +409,9 @@
 vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
 {
 
-	if (trunk->vlans[ifv->ifv_tag] != NULL)
+	if (trunk->vlans[ifv->ifv_vid] != NULL)
 		return EEXIST;
-	trunk->vlans[ifv->ifv_tag] = ifv;
+	trunk->vlans[ifv->ifv_vid] = ifv;
 	trunk->refcnt++;
 
 	return (0);
@@ -422,7 +421,7 @@
 vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
 {
 
-	trunk->vlans[ifv->ifv_tag] = NULL;
+	trunk->vlans[ifv->ifv_vid] = NULL;
 	trunk->refcnt--;
 
 	return (0);
@@ -460,23 +459,19 @@
  * traffic that it doesn't really want, which ends up being discarded
  * later by the upper protocol layers. Unfortunately, there's no way
  * to avoid this: there really is only one physical interface.
- *
- * XXX: There is a possible race here if more than one thread is
- *      modifying the multicast state of the vlan interface at the same time.
  */
 static int
 vlan_setmulti(struct ifnet *ifp)
 {
 	struct ifnet		*ifp_p;
-	struct ifmultiaddr	*ifma, *rifma = NULL;
+	struct ifmultiaddr	*ifma;
 	struct ifvlan		*sc;
 	struct vlan_mc_entry	*mc;
 	int			error;
 
-	/*VLAN_LOCK_ASSERT();*/
-
 	/* Find the parent. */
 	sc = ifp->if_softc;
+	TRUNK_LOCK_ASSERT(TRUNK(sc));
 	ifp_p = PARENT(sc);
 
 	CURVNET_SET_QUIET(ifp_p->if_vnet);
@@ -483,25 +478,29 @@
 
 	/* First, remove any existing filter entries. */
 	while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
-		error = if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
-		if (error)
-			return (error);
 		SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
+		(void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
 		free(mc, M_VLAN);
 	}
 
 	/* Now program new ones. */
+	IF_ADDR_WLOCK(ifp);
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 		mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT);
-		if (mc == NULL)
+		if (mc == NULL) {
+			IF_ADDR_WUNLOCK(ifp);
 			return (ENOMEM);
+		}
 		bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len);
 		mc->mc_addr.sdl_index = ifp_p->if_index;
 		SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
+	}
+	IF_ADDR_WUNLOCK(ifp);
+	SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) {
 		error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr,
-		    &rifma);
+		    NULL);
 		if (error)
 			return (error);
 	}
@@ -624,10 +623,14 @@
 }
 
 /*
- * Return the 16bit vlan tag for this interface.
+ * Return the 12-bit VLAN VID for this interface, for use by external
+ * components such as Infiniband.
+ *
+ * XXXRW: Note that the function name here is historical; it should be named
+ * vlan_vid().
  */
 static int
-vlan_tag(struct ifnet *ifp, uint16_t *tagp)
+vlan_tag(struct ifnet *ifp, uint16_t *vidp)
 {
 	struct ifvlan *ifv;
 
@@ -634,7 +637,7 @@
 	if (ifp->if_type != IFT_L2VLAN)
 		return (EINVAL);
 	ifv = ifp->if_softc;
-	*tagp = ifv->ifv_tag;
+	*vidp = ifv->ifv_vid;
 	return (0);
 }
 
@@ -670,10 +673,10 @@
 }
 
 /*
- * Return the vlan device present at the specific tag.
+ * Return the vlan device present at the specific VID.
  */
 static struct ifnet *
-vlan_devat(struct ifnet *ifp, uint16_t tag)
+vlan_devat(struct ifnet *ifp, uint16_t vid)
 {
 	struct ifvlantrunk *trunk;
 	struct ifvlan *ifv;
@@ -683,7 +686,7 @@
 		return (NULL);
 	ifp = NULL;
 	TRUNK_RLOCK(trunk);
-	ifv = vlan_gethash(trunk, tag);
+	ifv = vlan_gethash(trunk, vid);
 	if (ifv)
 		ifp = ifv->ifv_ifp;
 	TRUNK_RUNLOCK(trunk);
@@ -726,7 +729,8 @@
 		vlan_tag_p = vlan_tag;
 		vlan_devat_p = vlan_devat;
 #ifndef VIMAGE
-		if_clone_attach(&vlan_cloner);
+		vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
+		    vlan_clone_create, vlan_clone_destroy);
 #endif
 		if (bootverbose)
 			printf("vlan: initialized, using "
@@ -740,7 +744,7 @@
 		break;
 	case MOD_UNLOAD:
 #ifndef VIMAGE
-		if_clone_detach(&vlan_cloner);
+		if_clone_detach(vlan_cloner);
 #endif
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag);
 		EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag);
@@ -749,8 +753,8 @@
 		vlan_trunk_cap_p = NULL;
 		vlan_trunkdev_p = NULL;
 		vlan_tag_p = NULL;
-		vlan_cookie_p = vlan_cookie;
-		vlan_setcookie_p = vlan_setcookie;
+		vlan_cookie_p = NULL;
+		vlan_setcookie_p = NULL;
 		vlan_devat_p = NULL;
 		VLAN_LOCK_DESTROY();
 		if (bootverbose)
@@ -776,8 +780,9 @@
 vnet_vlan_init(const void *unused __unused)
 {
 
+	vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
+		    vlan_clone_create, vlan_clone_destroy);
 	V_vlan_cloner = vlan_cloner;
-	if_clone_attach(&V_vlan_cloner);
 }
 VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_vlan_init, NULL);
@@ -786,7 +791,7 @@
 vnet_vlan_uninit(const void *unused __unused)
 {
 
-	if_clone_detach(&V_vlan_cloner);
+	if_clone_detach(V_vlan_cloner);
 }
 VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
     vnet_vlan_uninit, NULL);
@@ -793,11 +798,11 @@
 #endif
 
 static struct ifnet *
-vlan_clone_match_ethertag(struct if_clone *ifc, const char *name, int *tag)
+vlan_clone_match_ethervid(struct if_clone *ifc, const char *name, int *vidp)
 {
 	const char *cp;
 	struct ifnet *ifp;
-	int t;
+	int vid;
 
 	/* Check for <etherif>.<vlan> style interface names. */
 	IFNET_RLOCK_NOSLEEP();
@@ -816,13 +821,13 @@
 			continue;
 		if (*cp == '\0')
 			continue;
-		t = 0;
+		vid = 0;
 		for(; *cp >= '0' && *cp <= '9'; cp++)
-			t = (t * 10) + (*cp - '0');
+			vid = (vid * 10) + (*cp - '0');
 		if (*cp != '\0')
 			continue;
-		if (tag != NULL)
-			*tag = t;
+		if (vidp != NULL)
+			*vidp = vid;
 		break;
 	}
 	IFNET_RUNLOCK_NOSLEEP();
@@ -835,10 +840,10 @@
 {
 	const char *cp;
 
-	if (vlan_clone_match_ethertag(ifc, name, NULL) != NULL)
+	if (vlan_clone_match_ethervid(ifc, name, NULL) != NULL)
 		return (1);
 
-	if (strncmp(VLANNAME, name, strlen(VLANNAME)) != 0)
+	if (strncmp(vlanname, name, strlen(vlanname)) != 0)
 		return (0);
 	for (cp = name + 4; *cp != '\0'; cp++) {
 		if (*cp < '0' || *cp > '9')
@@ -855,7 +860,7 @@
 	int wildcard;
 	int unit;
 	int error;
-	int tag;
+	int vid;
 	int ethertag;
 	struct ifvlan *ifv;
 	struct ifnet *ifp;
@@ -873,6 +878,9 @@
 	 *   must be configured separately.
 	 * The first technique is preferred; the latter two are
 	 * supported for backwards compatibilty.
+	 *
+	 * XXXRW: Note historic use of the word "tag" here.  New ioctls may be
+	 * called for.
 	 */
 	if (params) {
 		error = copyin(params, &vlr, sizeof(vlr));
@@ -882,7 +890,7 @@
 		if (p == NULL)
 			return ENXIO;
 		/*
-		 * Don't let the caller set up a VLAN tag with
+		 * Don't let the caller set up a VLAN VID with
 		 * anything except VLID bits.
 		 */
 		if (vlr.vlr_tag & ~EVL_VLID_MASK)
@@ -892,18 +900,18 @@
 			return (error);
 
 		ethertag = 1;
-		tag = vlr.vlr_tag;
+		vid = vlr.vlr_tag;
 		wildcard = (unit < 0);
-	} else if ((p = vlan_clone_match_ethertag(ifc, name, &tag)) != NULL) {
+	} else if ((p = vlan_clone_match_ethervid(ifc, name, &vid)) != NULL) {
 		ethertag = 1;
 		unit = -1;
 		wildcard = 0;
 
 		/*
-		 * Don't let the caller set up a VLAN tag with
+		 * Don't let the caller set up a VLAN VID with
 		 * anything except VLID bits.
 		 */
-		if (tag & ~EVL_VLID_MASK)
+		if (vid & ~EVL_VLID_MASK)
 			return (EINVAL);
 	} else {
 		ethertag = 0;
@@ -943,7 +951,7 @@
 	 * we don't conform to the default naming convention for interfaces.
 	 */
 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
-	ifp->if_dname = ifc->ifc_name;
+	ifp->if_dname = vlanname;
 	ifp->if_dunit = unit;
 	/* NB: flags are not set here */
 	ifp->if_linkmib = &ifv->ifv_mib;
@@ -965,7 +973,7 @@
 	sdl->sdl_type = IFT_L2VLAN;
 
 	if (ethertag) {
-		error = vlan_config(ifv, p, tag);
+		error = vlan_config(ifv, p, vid);
 		if (error != 0) {
 			/*
 			 * Since we've partially failed, we need to back
@@ -974,7 +982,7 @@
 			 */
 			ether_ifdetach(ifp);
 			vlan_unconfig(ifp);
-			if_free_type(ifp, IFT_ETHER);
+			if_free(ifp);
 			ifc_free_unit(ifc, unit);
 			free(ifv, M_VLAN);
 
@@ -996,7 +1004,7 @@
 
 	ether_ifdetach(ifp);	/* first, remove it from system-wide lists */
 	vlan_unconfig(ifp);	/* now it can be unconfigured and freed */
-	if_free_type(ifp, IFT_ETHER);
+	if_free(ifp);
 	free(ifv, M_VLAN);
 	ifc_free_unit(ifc, unit);
 
@@ -1035,7 +1043,7 @@
 	if (!UP_AND_RUNNING(p)) {
 		m_freem(m);
 		ifp->if_oerrors++;
-		return (0);
+		return (ENETDOWN);
 	}
 
 	/*
@@ -1075,10 +1083,10 @@
 	 * packet tag that holds it.
 	 */
 	if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
-		m->m_pkthdr.ether_vtag = ifv->ifv_tag;
+		m->m_pkthdr.ether_vtag = ifv->ifv_vid;
 		m->m_flags |= M_VLANTAG;
 	} else {
-		m = ether_vlanencap(m, ifv->ifv_tag);
+		m = ether_vlanencap(m, ifv->ifv_vid);
 		if (m == NULL) {
 			if_printf(ifp, "unable to prepend VLAN header\n");
 			ifp->if_oerrors++;
@@ -1112,7 +1120,7 @@
 {
 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
 	struct ifvlan *ifv;
-	uint16_t tag;
+	uint16_t vid;
 
 	KASSERT(trunk != NULL, ("%s: no trunk", __func__));
 
@@ -1121,7 +1129,7 @@
 		 * Packet is tagged, but m contains a normal
 		 * Ethernet frame; the tag is stored out-of-band.
 		 */
-		tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
+		vid = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
 		m->m_flags &= ~M_VLANTAG;
 	} else {
 		struct ether_vlan_header *evl;
@@ -1137,7 +1145,7 @@
 				return;
 			}
 			evl = mtod(m, struct ether_vlan_header *);
-			tag = EVL_VLANOFTAG(ntohs(evl->evl_tag));
+			vid = EVL_VLANOFTAG(ntohs(evl->evl_tag));
 
 			/*
 			 * Remove the 802.1q header by copying the Ethernet
@@ -1162,7 +1170,7 @@
 	}
 
 	TRUNK_RLOCK(trunk);
-	ifv = vlan_gethash(trunk, tag);
+	ifv = vlan_gethash(trunk, vid);
 	if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
 		TRUNK_RUNLOCK(trunk);
 		m_freem(m);
@@ -1179,7 +1187,7 @@
 }
 
 static int
-vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag)
+vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
 {
 	struct ifvlantrunk *trunk;
 	struct ifnet *ifp;
@@ -1186,7 +1194,7 @@
 	int error = 0;
 
 	/* VID numbers 0x0 and 0xFFF are reserved */
-	if (tag == 0 || tag == 0xFFF)
+	if (vid == 0 || vid == 0xFFF)
 		return (EINVAL);
 	if (p->if_type != IFT_ETHER &&
 	    (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
@@ -1218,7 +1226,7 @@
 		TRUNK_LOCK(trunk);
 	}
 
-	ifv->ifv_tag = tag;	/* must set this before vlan_inshash() */
+	ifv->ifv_vid = vid;	/* must set this before vlan_inshash() */
 	error = vlan_inshash(trunk, ifv);
 	if (error)
 		goto done;
@@ -1296,7 +1304,7 @@
 done:
 	TRUNK_UNLOCK(trunk);
 	if (error == 0)
-		EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_tag);
+		EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid);
 	VLAN_UNLOCK();
 
 	return (error);
@@ -1365,7 +1373,7 @@
 		 * Check if we were the last.
 		 */
 		if (trunk->refcnt == 0) {
-			trunk->parent->if_vlantrunk = NULL;
+			parent->if_vlantrunk = NULL;
 			/*
 			 * XXXGL: If some ithread has already entered
 			 * vlan_input() and is now blocked on the trunk
@@ -1392,7 +1400,7 @@
 	 * to cleanup anyway.
 	 */
 	if (parent != NULL)
-		EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_tag);
+		EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid);
 }
 
 /* Handle a reference counted flag that should be set on the parent as well */
@@ -1476,6 +1484,7 @@
 {
 	struct ifnet *p = PARENT(ifv);
 	struct ifnet *ifp = ifv->ifv_ifp;
+	struct ifnet_hw_tsomax hw_tsomax;
 
 	TRUNK_LOCK_ASSERT(TRUNK(ifv));
 
@@ -1486,13 +1495,16 @@
 	 * offloading requires hardware VLAN tagging.
 	 */
 	if (p->if_capabilities & IFCAP_VLAN_HWCSUM)
-		ifp->if_capabilities = p->if_capabilities & IFCAP_HWCSUM;
+		ifp->if_capabilities =
+		    p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
 
 	if (p->if_capenable & IFCAP_VLAN_HWCSUM &&
 	    p->if_capenable & IFCAP_VLAN_HWTAGGING) {
-		ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM;
+		ifp->if_capenable =
+		    p->if_capenable & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
 		ifp->if_hwassist = p->if_hwassist & (CSUM_IP | CSUM_TCP |
-		    CSUM_UDP | CSUM_SCTP | CSUM_IP_FRAGS | CSUM_FRAGMENT);
+		    CSUM_UDP | CSUM_SCTP | CSUM_TCP_IPV6 | CSUM_UDP_IPV6 |
+		    CSUM_SCTP_IPV6);
 	} else {
 		ifp->if_capenable = 0;
 		ifp->if_hwassist = 0;
@@ -1502,6 +1514,9 @@
 	 * propagate the hardware-assisted flag. TSO on VLANs
 	 * does not necessarily require hardware VLAN tagging.
 	 */
+	memset(&hw_tsomax, 0, sizeof(hw_tsomax));
+	if_hw_tsomax_common(p, &hw_tsomax);
+	if_hw_tsomax_update(ifp, &hw_tsomax);
 	if (p->if_capabilities & IFCAP_VLAN_HWTSO)
 		ifp->if_capabilities |= p->if_capabilities & IFCAP_TSO;
 	if (p->if_capenable & IFCAP_VLAN_HWTSO) {
@@ -1511,6 +1526,22 @@
 		ifp->if_capenable &= ~(p->if_capenable & IFCAP_TSO);
 		ifp->if_hwassist &= ~(p->if_hwassist & CSUM_TSO);
 	}
+
+	/*
+	 * If the parent interface can offload TCP connections over VLANs then
+	 * propagate its TOE capability to the VLAN interface.
+	 *
+	 * All TOE drivers in the tree today can deal with VLANs.  If this
+	 * changes then IFCAP_VLAN_TOE should be promoted to a full capability
+	 * with its own bit.
+	 */
+#define	IFCAP_VLAN_TOE IFCAP_TOE
+	if (p->if_capabilities & IFCAP_VLAN_TOE)
+		ifp->if_capabilities |= p->if_capabilities & IFCAP_TOE;
+	if (p->if_capenable & IFCAP_VLAN_TOE) {
+		TOEDEV(ifp) = TOEDEV(p);
+		ifp->if_capenable |= p->if_capenable & IFCAP_TOE;
+	}
 }
 
 static void
@@ -1541,6 +1572,7 @@
 	struct ifreq *ifr;
 	struct ifaddr *ifa;
 	struct ifvlan *ifv;
+	struct ifvlantrunk *trunk;
 	struct vlanreq vlr;
 	int error = 0;
 
@@ -1557,12 +1589,8 @@
 #endif
 		break;
 	case SIOCGIFADDR:
-                {
-			struct sockaddr *sa;
-
-			sa = (struct sockaddr *)&ifr->ifr_data;
-			bcopy(IF_LLADDR(ifp), sa->sa_data, ifp->if_addrlen);
-                }
+		bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
+		    ifp->if_addrlen);
 		break;
 	case SIOCGIFMEDIA:
 		VLAN_LOCK();
@@ -1612,6 +1640,13 @@
 
 	case SIOCSETVLAN:
 #ifdef VIMAGE
+		/*
+		 * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN
+		 * interface to be delegated to a jail without allowing the
+		 * jail to change what underlying interface/VID it is
+		 * associated with.  We are not entirely convinced that this
+		 * is the right way to accomplish that policy goal.
+		 */
 		if (ifp->if_vnet != ifp->if_home_vnet) {
 			error = EPERM;
 			break;
@@ -1630,7 +1665,7 @@
 			break;
 		}
 		/*
-		 * Don't let the caller set up a VLAN tag with
+		 * Don't let the caller set up a VLAN VID with
 		 * anything except VLID bits.
 		 */
 		if (vlr.vlr_tag & ~EVL_VLID_MASK) {
@@ -1657,7 +1692,7 @@
 		if (TRUNK(ifv) != NULL) {
 			strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname,
 			    sizeof(vlr.vlr_parent));
-			vlr.vlr_tag = ifv->ifv_tag;
+			vlr.vlr_tag = ifv->ifv_vid;
 		}
 		VLAN_UNLOCK();
 		error = copyout(&vlr, ifr->ifr_data, sizeof(vlr));
@@ -1678,8 +1713,12 @@
 		 * If we don't have a parent, just remember the membership for
 		 * when we do.
 		 */
-		if (TRUNK(ifv) != NULL)
+		trunk = TRUNK(ifv);
+		if (trunk != NULL) {
+			TRUNK_LOCK(trunk);
 			error = vlan_setmulti(ifp);
+			TRUNK_UNLOCK(trunk);
+		}
 		break;
 
 	default:

Modified: trunk/sys/net/if_vlan_var.h
===================================================================
--- trunk/sys/net/if_vlan_var.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_vlan_var.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -27,7 +27,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/if_vlan_var.h 219819 2011-03-21 09:40:01Z jeff $
+ * $FreeBSD: stable/10/sys/net/if_vlan_var.h 326512 2017-12-04 09:27:36Z hselasky $
  */
 
 #ifndef _NET_IF_VLAN_VAR_H_
@@ -109,7 +109,7 @@
  * received VLAN tag (containing both vlan and priority information)
  * into the ether_vtag mbuf packet header field:
  * 
- *	m->m_pkthdr.ether_vtag = vlan_id;	// ntohs()?
+ *	m->m_pkthdr.ether_vtag = vtag;		// ntohs()?
  *	m->m_flags |= M_VLANTAG;
  *
  * to mark the packet m with the specified VLAN tag.
@@ -133,16 +133,16 @@
 } while (0)
 
 #define	VLAN_TRUNKDEV(_ifp)					\
-	(_ifp)->if_type == IFT_L2VLAN ? (*vlan_trunkdev_p)((_ifp)) : NULL
-#define	VLAN_TAG(_ifp, _tag)					\
-	(_ifp)->if_type == IFT_L2VLAN ? (*vlan_tag_p)((_ifp), (_tag)) : EINVAL
+	((_ifp)->if_type == IFT_L2VLAN ? (*vlan_trunkdev_p)((_ifp)) : NULL)
+#define	VLAN_TAG(_ifp, _vid)					\
+	((_ifp)->if_type == IFT_L2VLAN ? (*vlan_tag_p)((_ifp), (_vid)) : EINVAL)
 #define	VLAN_COOKIE(_ifp)					\
-	(_ifp)->if_type == IFT_L2VLAN ? (*vlan_cookie_p)((_ifp)) : NULL
+	((_ifp)->if_type == IFT_L2VLAN ? (*vlan_cookie_p)((_ifp)) : NULL)
 #define	VLAN_SETCOOKIE(_ifp, _cookie)				\
-	(_ifp)->if_type == IFT_L2VLAN ?				\
-	    (*vlan_setcookie_p)((_ifp), (_cookie)) : EINVAL
-#define	VLAN_DEVAT(_ifp, _tag)					\
-	(_ifp)->if_vlantrunk != NULL ? (*vlan_devat_p)((_ifp), (_tag)) : NULL
+	((_ifp)->if_type == IFT_L2VLAN ?			\
+	    (*vlan_setcookie_p)((_ifp), (_cookie)) : EINVAL)
+#define	VLAN_DEVAT(_ifp, _vid)					\
+	((_ifp)->if_vlantrunk != NULL ? (*vlan_devat_p)((_ifp), (_vid)) : NULL)
 
 extern	void (*vlan_trunk_cap_p)(struct ifnet *);
 extern	struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);

Added: trunk/sys/net/if_vxlan.c
===================================================================
--- trunk/sys/net/if_vxlan.c	                        (rev 0)
+++ trunk/sys/net/if_vxlan.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,3090 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014, Bryan Venteicher <bryanv at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/net/if_vxlan.c 327142 2017-12-24 02:06:16Z ae $");
+
+#include <sys/param.h>
+#include <sys/eventhandler.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/hash.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/refcount.h>
+#include <sys/rmlock.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/sbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_vxlan.h>
+#include <net/netisr.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/ip_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+
+struct vxlan_softc;
+LIST_HEAD(vxlan_softc_head, vxlan_softc);
+
+struct vxlan_socket_mc_info {
+	union vxlan_sockaddr		 vxlsomc_saddr;
+	union vxlan_sockaddr		 vxlsomc_gaddr;
+	int				 vxlsomc_ifidx;
+	int				 vxlsomc_users;
+};
+
+#define VXLAN_SO_MC_MAX_GROUPS		32
+
+#define VXLAN_SO_VNI_HASH_SHIFT		6
+#define VXLAN_SO_VNI_HASH_SIZE		(1 << VXLAN_SO_VNI_HASH_SHIFT)
+#define VXLAN_SO_VNI_HASH(_vni)		((_vni) % VXLAN_SO_VNI_HASH_SIZE)
+
+struct vxlan_socket {
+	struct socket			*vxlso_sock;
+	struct rmlock			 vxlso_lock;
+	u_int				 vxlso_refcnt;
+	union vxlan_sockaddr		 vxlso_laddr;
+	LIST_ENTRY(vxlan_socket)	 vxlso_entry;
+	struct vxlan_softc_head		 vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE];
+	struct vxlan_socket_mc_info	 vxlso_mc[VXLAN_SO_MC_MAX_GROUPS];
+};
+
+#define VXLAN_SO_RLOCK(_vso, _p)	rm_rlock(&(_vso)->vxlso_lock, (_p))
+#define VXLAN_SO_RUNLOCK(_vso, _p)	rm_runlock(&(_vso)->vxlso_lock, (_p))
+#define VXLAN_SO_WLOCK(_vso)		rm_wlock(&(_vso)->vxlso_lock)
+#define VXLAN_SO_WUNLOCK(_vso)		rm_wunlock(&(_vso)->vxlso_lock)
+#define VXLAN_SO_LOCK_ASSERT(_vso) \
+    rm_assert(&(_vso)->vxlso_lock, RA_LOCKED)
+#define VXLAN_SO_LOCK_WASSERT(_vso) \
+    rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED)
+
+#define VXLAN_SO_ACQUIRE(_vso)		refcount_acquire(&(_vso)->vxlso_refcnt)
+#define VXLAN_SO_RELEASE(_vso)		refcount_release(&(_vso)->vxlso_refcnt)
+
+struct vxlan_ftable_entry {
+	LIST_ENTRY(vxlan_ftable_entry)	 vxlfe_hash;
+	uint16_t			 vxlfe_flags;
+	uint8_t				 vxlfe_mac[ETHER_ADDR_LEN];
+	union vxlan_sockaddr		 vxlfe_raddr;
+	time_t				 vxlfe_expire;
+};
+
+#define VXLAN_FE_FLAG_DYNAMIC		0x01
+#define VXLAN_FE_FLAG_STATIC		0x02
+
+#define VXLAN_FE_IS_DYNAMIC(_fe) \
+    ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC)
+
+#define VXLAN_SC_FTABLE_SHIFT		9
+#define VXLAN_SC_FTABLE_SIZE		(1 << VXLAN_SC_FTABLE_SHIFT)
+#define VXLAN_SC_FTABLE_MASK		(VXLAN_SC_FTABLE_SIZE - 1)
+#define VXLAN_SC_FTABLE_HASH(_sc, _mac)	\
+    (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE)
+
+LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry);
+
+struct vxlan_statistics {
+	uint32_t	ftable_nospace;
+	uint32_t	ftable_lock_upgrade_failed;
+};
+
+struct vxlan_softc {
+	struct ifnet			*vxl_ifp;
+	struct vxlan_socket		*vxl_sock;
+	uint32_t			 vxl_vni;
+	union vxlan_sockaddr		 vxl_src_addr;
+	union vxlan_sockaddr		 vxl_dst_addr;
+	uint32_t			 vxl_flags;
+#define VXLAN_FLAG_INIT		0x0001
+#define VXLAN_FLAG_TEARDOWN	0x0002
+#define VXLAN_FLAG_LEARN	0x0004
+
+	uint32_t			 vxl_port_hash_key;
+	uint16_t			 vxl_min_port;
+	uint16_t			 vxl_max_port;
+	uint8_t				 vxl_ttl;
+
+	/* Lookup table from MAC address to forwarding entry. */
+	uint32_t			 vxl_ftable_cnt;
+	uint32_t			 vxl_ftable_max;
+	uint32_t			 vxl_ftable_timeout;
+	uint32_t			 vxl_ftable_hash_key;
+	struct vxlan_ftable_head	*vxl_ftable;
+
+	/* Derived from vxl_dst_addr. */
+	struct vxlan_ftable_entry	 vxl_default_fe;
+
+	struct ip_moptions		*vxl_im4o;
+	struct ip6_moptions		*vxl_im6o;
+
+	struct rmlock			 vxl_lock;
+	volatile u_int			 vxl_refcnt;
+
+	int				 vxl_unit;
+	int				 vxl_vso_mc_index;
+	struct vxlan_statistics		 vxl_stats;
+	struct sysctl_oid		*vxl_sysctl_node;
+	struct sysctl_ctx_list		 vxl_sysctl_ctx;
+	struct callout			 vxl_callout;
+	uint8_t				 vxl_hwaddr[ETHER_ADDR_LEN];
+	int				 vxl_mc_ifindex;
+	struct ifnet			*vxl_mc_ifp;
+	char				 vxl_mc_ifname[IFNAMSIZ];
+	LIST_ENTRY(vxlan_softc)		 vxl_entry;
+	LIST_ENTRY(vxlan_softc)		 vxl_ifdetach_list;
+};
+
+#define VXLAN_RLOCK(_sc, _p)	rm_rlock(&(_sc)->vxl_lock, (_p))
+#define VXLAN_RUNLOCK(_sc, _p)	rm_runlock(&(_sc)->vxl_lock, (_p))
+#define VXLAN_WLOCK(_sc)	rm_wlock(&(_sc)->vxl_lock)
+#define VXLAN_WUNLOCK(_sc)	rm_wunlock(&(_sc)->vxl_lock)
+#define VXLAN_LOCK_WOWNED(_sc)	rm_wowned(&(_sc)->vxl_lock)
+#define VXLAN_LOCK_ASSERT(_sc)	rm_assert(&(_sc)->vxl_lock, RA_LOCKED)
+#define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED)
+#define VXLAN_UNLOCK(_sc, _p) do {		\
+    if (VXLAN_LOCK_WOWNED(_sc))			\
+	VXLAN_WUNLOCK(_sc);			\
+    else					\
+	VXLAN_RUNLOCK(_sc, _p);			\
+} while (0)
+
+#define VXLAN_ACQUIRE(_sc)	refcount_acquire(&(_sc)->vxl_refcnt)
+#define VXLAN_RELEASE(_sc)	refcount_release(&(_sc)->vxl_refcnt)
+
+#define	satoconstsin(sa)	((const struct sockaddr_in *)(sa))
+#define	satoconstsin6(sa)	((const struct sockaddr_in6 *)(sa))
+
+struct vxlanudphdr {
+	struct udphdr		vxlh_udp;
+	struct vxlan_header	vxlh_hdr;
+} __packed;
+
+static int	vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *);
+static void	vxlan_ftable_init(struct vxlan_softc *);
+static void	vxlan_ftable_fini(struct vxlan_softc *);
+static void	vxlan_ftable_flush(struct vxlan_softc *, int);
+static void	vxlan_ftable_expire(struct vxlan_softc *);
+static int	vxlan_ftable_update_locked(struct vxlan_softc *,
+		    const struct sockaddr *, const uint8_t *,
+		    struct rm_priotracker *);
+static int	vxlan_ftable_update(struct vxlan_softc *,
+		    const struct sockaddr *, const uint8_t *);
+static int	vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS);
+
+static struct vxlan_ftable_entry *
+		vxlan_ftable_entry_alloc(void);
+static void	vxlan_ftable_entry_free(struct vxlan_ftable_entry *);
+static void	vxlan_ftable_entry_init(struct vxlan_softc *,
+		    struct vxlan_ftable_entry *, const uint8_t *,
+		    const struct sockaddr *, uint32_t);
+static void	vxlan_ftable_entry_destroy(struct vxlan_softc *,
+		    struct vxlan_ftable_entry *);
+static int	vxlan_ftable_entry_insert(struct vxlan_softc *,
+		    struct vxlan_ftable_entry *);
+static struct vxlan_ftable_entry *
+		vxlan_ftable_entry_lookup(struct vxlan_softc *,
+		    const uint8_t *);
+static void	vxlan_ftable_entry_dump(struct vxlan_ftable_entry *,
+		    struct sbuf *);
+
+static struct vxlan_socket *
+		vxlan_socket_alloc(const union vxlan_sockaddr *);
+static void	vxlan_socket_destroy(struct vxlan_socket *);
+static void	vxlan_socket_release(struct vxlan_socket *);
+static struct vxlan_socket *
+		vxlan_socket_lookup(union vxlan_sockaddr *vxlsa);
+static void	vxlan_socket_insert(struct vxlan_socket *);
+static int	vxlan_socket_init(struct vxlan_socket *, struct ifnet *);
+static int	vxlan_socket_bind(struct vxlan_socket *, struct ifnet *);
+static int	vxlan_socket_create(struct ifnet *, int,
+		    const union vxlan_sockaddr *, struct vxlan_socket **);
+static void	vxlan_socket_ifdetach(struct vxlan_socket *,
+		    struct ifnet *, struct vxlan_softc_head *);
+
+static struct vxlan_socket *
+		vxlan_socket_mc_lookup(const union vxlan_sockaddr *);
+static int	vxlan_sockaddr_mc_info_match(
+		    const struct vxlan_socket_mc_info *,
+		    const union vxlan_sockaddr *,
+		    const union vxlan_sockaddr *, int);
+static int	vxlan_socket_mc_join_group(struct vxlan_socket *,
+		    const union vxlan_sockaddr *, const union vxlan_sockaddr *,
+		    int *, union vxlan_sockaddr *);
+static int	vxlan_socket_mc_leave_group(struct vxlan_socket *,
+		    const union vxlan_sockaddr *,
+		    const union vxlan_sockaddr *, int);
+static int	vxlan_socket_mc_add_group(struct vxlan_socket *,
+		    const union vxlan_sockaddr *, const union vxlan_sockaddr *,
+		    int, int *);
+static void	vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *,
+		    int);
+
+static struct vxlan_softc *
+		vxlan_socket_lookup_softc_locked(struct vxlan_socket *,
+		    uint32_t);
+static struct vxlan_softc *
+		vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t);
+static int	vxlan_socket_insert_softc(struct vxlan_socket *,
+		    struct vxlan_softc *);
+static void	vxlan_socket_remove_softc(struct vxlan_socket *,
+		    struct vxlan_softc *);
+
+static struct ifnet *
+		vxlan_multicast_if_ref(struct vxlan_softc *, int);
+static void	vxlan_free_multicast(struct vxlan_softc *);
+static int	vxlan_setup_multicast_interface(struct vxlan_softc *);
+
+static int	vxlan_setup_multicast(struct vxlan_softc *);
+static int	vxlan_setup_socket(struct vxlan_softc *);
+static void	vxlan_setup_interface(struct vxlan_softc *);
+static int	vxlan_valid_init_config(struct vxlan_softc *);
+static void	vxlan_init_wait(struct vxlan_softc *);
+static void	vxlan_init_complete(struct vxlan_softc *);
+static void	vxlan_init(void *);
+static void	vxlan_release(struct vxlan_softc *);
+static void	vxlan_teardown_wait(struct vxlan_softc *);
+static void	vxlan_teardown_complete(struct vxlan_softc *);
+static void	vxlan_teardown_locked(struct vxlan_softc *);
+static void	vxlan_teardown(struct vxlan_softc *);
+static void	vxlan_ifdetach(struct vxlan_softc *, struct ifnet *,
+		    struct vxlan_softc_head *);
+static void	vxlan_timer(void *);
+
+static int	vxlan_ctrl_get_config(struct vxlan_softc *, void *);
+static int	vxlan_ctrl_set_vni(struct vxlan_softc *, void *);
+static int	vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *);
+static int	vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *);
+static int	vxlan_ctrl_set_local_port(struct vxlan_softc *, void *);
+static int	vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *);
+static int	vxlan_ctrl_set_port_range(struct vxlan_softc *, void *);
+static int	vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *);
+static int	vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *);
+static int	vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *);
+static int	vxlan_ctrl_set_ttl(struct vxlan_softc *, void *);
+static int	vxlan_ctrl_set_learn(struct vxlan_softc *, void *);
+static int	vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *);
+static int	vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *);
+static int	vxlan_ctrl_flush(struct vxlan_softc *, void *);
+static int	vxlan_ioctl_drvspec(struct vxlan_softc *,
+		    struct ifdrv *, int);
+static int	vxlan_ioctl_ifflags(struct vxlan_softc *);
+static int	vxlan_ioctl(struct ifnet *, u_long, caddr_t);
+
+#if defined(INET) || defined(INET6)
+static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *);
+static void	vxlan_encap_header(struct vxlan_softc *, struct mbuf *,
+		    int, uint16_t, uint16_t);
+#endif
+static int	vxlan_encap4(struct vxlan_softc *,
+		    const union vxlan_sockaddr *, struct mbuf *);
+static int	vxlan_encap6(struct vxlan_softc *,
+		    const union vxlan_sockaddr *, struct mbuf *);
+static int	vxlan_transmit(struct ifnet *, struct mbuf *);
+static void	vxlan_qflush(struct ifnet *);
+static void	vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *,
+		    const struct sockaddr *, void *);
+static int	vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **,
+		    const struct sockaddr *);
+
+static void	vxlan_set_default_config(struct vxlan_softc *);
+static int	vxlan_set_user_config(struct vxlan_softc *,
+		     struct ifvxlanparam *);
+static int	vxlan_clone_create(struct if_clone *, int, caddr_t);
+static void	vxlan_clone_destroy(struct ifnet *);
+
+static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *);
+static void	vxlan_fakeaddr(struct vxlan_softc *);
+
+static int	vxlan_sockaddr_cmp(const union vxlan_sockaddr *,
+		    const struct sockaddr *);
+static void	vxlan_sockaddr_copy(union vxlan_sockaddr *,
+		    const struct sockaddr *);
+static int	vxlan_sockaddr_in_equal(const union vxlan_sockaddr *,
+		    const struct sockaddr *);
+static void	vxlan_sockaddr_in_copy(union vxlan_sockaddr *,
+		    const struct sockaddr *);
+static int	vxlan_sockaddr_supported(const union vxlan_sockaddr *, int);
+static int	vxlan_sockaddr_in_any(const union vxlan_sockaddr *);
+static int	vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *);
+
+static int	vxlan_can_change_config(struct vxlan_softc *);
+static int	vxlan_check_vni(uint32_t);
+static int	vxlan_check_ttl(int);
+static int	vxlan_check_ftable_timeout(uint32_t);
+static int	vxlan_check_ftable_max(uint32_t);
+
+static void	vxlan_sysctl_setup(struct vxlan_softc *);
+static void	vxlan_sysctl_destroy(struct vxlan_softc *);
+static int	vxlan_tunable_int(struct vxlan_softc *, const char *, int);
+
+static void	vxlan_ifdetach_event(void *, struct ifnet *);
+static void	vxlan_load(void);
+static void	vxlan_unload(void);
+static int	vxlan_modevent(module_t, int, void *);
+
+static const char vxlan_name[] = "vxlan";
+static MALLOC_DEFINE(M_VXLAN, vxlan_name,
+    "Virtual eXtensible LAN Interface");
+static struct if_clone *vxlan_cloner;
+static struct mtx vxlan_list_mtx;
+static LIST_HEAD(, vxlan_socket) vxlan_socket_list;
+
+static eventhandler_tag vxlan_ifdetach_event_tag;
+
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW, 0,
+    "Virtual eXtensible Local Area Network");
+
+static int vxlan_legacy_port = 0;
+TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port);
+static int vxlan_reuse_port = 0;
+TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port);
+
+/* Default maximum number of addresses in the forwarding table. */
+#ifndef VXLAN_FTABLE_MAX
+#define VXLAN_FTABLE_MAX	2000
+#endif
+
+/* Timeout (in seconds) of addresses learned in the forwarding table. */
+#ifndef VXLAN_FTABLE_TIMEOUT
+#define VXLAN_FTABLE_TIMEOUT	(20 * 60)
+#endif
+
+/*
+ * Maximum timeout (in seconds) of addresses learned in the forwarding
+ * table.
+ */
+#ifndef VXLAN_FTABLE_MAX_TIMEOUT
+#define VXLAN_FTABLE_MAX_TIMEOUT	(60 * 60 * 24)
+#endif
+
+/* Number of seconds between pruning attempts of the forwarding table. */
+#ifndef VXLAN_FTABLE_PRUNE
+#define VXLAN_FTABLE_PRUNE	(5 * 60)
+#endif
+
+static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE;
+
+struct vxlan_control {
+	int	(*vxlc_func)(struct vxlan_softc *, void *);
+	int	vxlc_argsize;
+	int	vxlc_flags;
+#define VXLAN_CTRL_FLAG_COPYIN	0x01
+#define VXLAN_CTRL_FLAG_COPYOUT	0x02
+#define VXLAN_CTRL_FLAG_SUSER	0x04
+};
+
+static const struct vxlan_control vxlan_control_table[] = {
+	[VXLAN_CMD_GET_CONFIG] =
+	    {	vxlan_ctrl_get_config, sizeof(struct ifvxlancfg),
+		VXLAN_CTRL_FLAG_COPYOUT
+	    },
+
+	[VXLAN_CMD_SET_VNI] =
+	    {   vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd),
+		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+	    },
+
+	[VXLAN_CMD_SET_LOCAL_ADDR] =
+	    {   vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd),
+		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+	    },
+
+	[VXLAN_CMD_SET_REMOTE_ADDR] =
+	    {   vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd),
+		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+	    },
+
+	[VXLAN_CMD_SET_LOCAL_PORT] =
+	    {   vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd),
+		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+	    },
+
+	[VXLAN_CMD_SET_REMOTE_PORT] =
+	    {   vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd),
+		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+	    },
+
+	[VXLAN_CMD_SET_PORT_RANGE] =
+	    {   vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd),
+		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+	    },
+
+	[VXLAN_CMD_SET_FTABLE_TIMEOUT] =
+	    {	vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd),
+		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+	    },
+
+	[VXLAN_CMD_SET_FTABLE_MAX] =
+	    {	vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd),
+		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+	    },
+
+	[VXLAN_CMD_SET_MULTICAST_IF] =
+	    {	vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd),
+		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+	    },
+
+	[VXLAN_CMD_SET_TTL] =
+	    {	vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd),
+		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+	    },
+
+	[VXLAN_CMD_SET_LEARN] =
+	    {	vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd),
+		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+	    },
+
+	[VXLAN_CMD_FTABLE_ENTRY_ADD] =
+	    {	vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd),
+		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+	    },
+
+	[VXLAN_CMD_FTABLE_ENTRY_REM] =
+	    {	vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd),
+		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+	    },
+
+	[VXLAN_CMD_FLUSH] =
+	    {   vxlan_ctrl_flush, sizeof(struct ifvxlancmd),
+		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+	    },
+};
+
+static const int vxlan_control_table_size = nitems(vxlan_control_table);
+
+static int
+vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
+{
+	int i, d;
+
+	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
+		d = ((int)a[i]) - ((int)b[i]);
+
+	return (d);
+}
+
+static void
+vxlan_ftable_init(struct vxlan_softc *sc)
+{
+	int i;
+
+	sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) *
+	    VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK);
+
+	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++)
+		LIST_INIT(&sc->vxl_ftable[i]);
+	sc->vxl_ftable_hash_key = arc4random();
+}
+
+static void
+vxlan_ftable_fini(struct vxlan_softc *sc)
+{
+	int i;
+
+	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
+		KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]),
+		    ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i));
+	}
+	MPASS(sc->vxl_ftable_cnt == 0);
+
+	free(sc->vxl_ftable, M_VXLAN);
+	sc->vxl_ftable = NULL;
+}
+
+static void
+vxlan_ftable_flush(struct vxlan_softc *sc, int all)
+{
+	struct vxlan_ftable_entry *fe, *tfe;
+	int i;
+
+	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
+		LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
+			if (all || VXLAN_FE_IS_DYNAMIC(fe))
+				vxlan_ftable_entry_destroy(sc, fe);
+		}
+	}
+}
+
+static void
+vxlan_ftable_expire(struct vxlan_softc *sc)
+{
+	struct vxlan_ftable_entry *fe, *tfe;
+	int i;
+
+	VXLAN_LOCK_WASSERT(sc);
+
+	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
+		LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
+			if (VXLAN_FE_IS_DYNAMIC(fe) &&
+			    time_uptime >= fe->vxlfe_expire)
+				vxlan_ftable_entry_destroy(sc, fe);
+		}
+	}
+}
+
+static int
+vxlan_ftable_update_locked(struct vxlan_softc *sc, const struct sockaddr *sa,
+    const uint8_t *mac, struct rm_priotracker *tracker)
+{
+	union vxlan_sockaddr vxlsa;
+	struct vxlan_ftable_entry *fe;
+	int error;
+
+	VXLAN_LOCK_ASSERT(sc);
+
+again:
+	/*
+	 * A forwarding entry for this MAC address might already exist. If
+	 * so, update it, otherwise create a new one. We may have to upgrade
+	 * the lock if we have to change or create an entry.
+	 */
+	fe = vxlan_ftable_entry_lookup(sc, mac);
+	if (fe != NULL) {
+		fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
+
+		if (!VXLAN_FE_IS_DYNAMIC(fe) ||
+		    vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, sa))
+			return (0);
+		if (!VXLAN_LOCK_WOWNED(sc)) {
+			VXLAN_RUNLOCK(sc, tracker);
+			VXLAN_WLOCK(sc);
+			sc->vxl_stats.ftable_lock_upgrade_failed++;
+			goto again;
+		}
+		vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, sa);
+		return (0);
+	}
+
+	if (!VXLAN_LOCK_WOWNED(sc)) {
+		VXLAN_RUNLOCK(sc, tracker);
+		VXLAN_WLOCK(sc);
+		sc->vxl_stats.ftable_lock_upgrade_failed++;
+		goto again;
+	}
+
+	if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) {
+		sc->vxl_stats.ftable_nospace++;
+		return (ENOSPC);
+	}
+
+	fe = vxlan_ftable_entry_alloc();
+	if (fe == NULL)
+		return (ENOMEM);
+
+	/*
+	 * The source port may be randomly select by the remove host, so
+	 * use the port of the default destination address.
+	 */
+	vxlan_sockaddr_copy(&vxlsa, sa);
+	vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
+
+	vxlan_ftable_entry_init(sc, fe, mac, &vxlsa.sa,
+	    VXLAN_FE_FLAG_DYNAMIC);
+
+	/* The prior lookup failed, so the insert should not. */
+	error = vxlan_ftable_entry_insert(sc, fe);
+	MPASS(error == 0);
+
+	return (0);
+}
+
+static int
+vxlan_ftable_update(struct vxlan_softc *sc, const struct sockaddr *sa,
+    const uint8_t *mac)
+{
+	struct rm_priotracker tracker;
+	int error;
+
+	VXLAN_RLOCK(sc, &tracker);
+	error = vxlan_ftable_update_locked(sc, sa, mac, &tracker);
+	VXLAN_UNLOCK(sc, &tracker);
+
+	return (error);
+}
+
+static int
+vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS)
+{
+	struct rm_priotracker tracker;
+	struct sbuf sb;
+	struct vxlan_softc *sc;
+	struct vxlan_ftable_entry *fe;
+	size_t size;
+	int i, error;
+
+	/*
+	 * This is mostly intended for debugging during development. It is
+	 * not practical to dump an entire large table this way.
+	 */
+
+	sc = arg1;
+	size = PAGE_SIZE;	/* Calculate later. */
+
+	sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN);
+	sbuf_putc(&sb, '\n');
+
+	VXLAN_RLOCK(sc, &tracker);
+	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
+		LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) {
+			if (sbuf_error(&sb) != 0)
+				break;
+			vxlan_ftable_entry_dump(fe, &sb);
+		}
+	}
+	VXLAN_RUNLOCK(sc, &tracker);
+
+	if (sbuf_len(&sb) == 1)
+		sbuf_setpos(&sb, 0);
+
+	sbuf_finish(&sb);
+	error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
+	sbuf_delete(&sb);
+
+	return (error);
+}
+
+static struct vxlan_ftable_entry *
+vxlan_ftable_entry_alloc(void)
+{
+	struct vxlan_ftable_entry *fe;
+
+	fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT);
+
+	return (fe);
+}
+
+static void
+vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe)
+{
+
+	free(fe, M_VXLAN);
+}
+
+static void
+vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe,
+    const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
+{
+
+	fe->vxlfe_flags = flags;
+	fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
+	memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN);
+	vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa);
+}
+
+static void
+vxlan_ftable_entry_destroy(struct vxlan_softc *sc,
+    struct vxlan_ftable_entry *fe)
+{
+
+	sc->vxl_ftable_cnt--;
+	LIST_REMOVE(fe, vxlfe_hash);
+	vxlan_ftable_entry_free(fe);
+}
+
+static int
+vxlan_ftable_entry_insert(struct vxlan_softc *sc,
+    struct vxlan_ftable_entry *fe)
+{
+	struct vxlan_ftable_entry *lfe;
+	uint32_t hash;
+	int dir;
+
+	VXLAN_LOCK_WASSERT(sc);
+	hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac);
+
+	lfe = LIST_FIRST(&sc->vxl_ftable[hash]);
+	if (lfe == NULL) {
+		LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash);
+		goto out;
+	}
+
+	do {
+		dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac);
+		if (dir == 0)
+			return (EEXIST);
+		if (dir > 0) {
+			LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash);
+			goto out;
+		} else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) {
+			LIST_INSERT_AFTER(lfe, fe, vxlfe_hash);
+			goto out;
+		} else
+			lfe = LIST_NEXT(lfe, vxlfe_hash);
+	} while (lfe != NULL);
+
+out:
+	sc->vxl_ftable_cnt++;
+
+	return (0);
+}
+
+static struct vxlan_ftable_entry *
+vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac)
+{
+	struct vxlan_ftable_entry *fe;
+	uint32_t hash;
+	int dir;
+
+	VXLAN_LOCK_ASSERT(sc);
+	hash = VXLAN_SC_FTABLE_HASH(sc, mac);
+
+	LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) {
+		dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac);
+		if (dir == 0)
+			return (fe);
+		if (dir > 0)
+			break;
+	}
+
+	return (NULL);
+}
+
+static void
+vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb)
+{
+	char buf[64];
+	const union vxlan_sockaddr *sa;
+	const void *addr;
+	int i, len, af, width;
+
+	sa = &fe->vxlfe_raddr;
+	af = sa->sa.sa_family;
+	len = sbuf_len(sb);
+
+	sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S',
+	    fe->vxlfe_flags);
+
+	for (i = 0; i < ETHER_ADDR_LEN - 1; i++)
+		sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]);
+	sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]);
+
+	if (af == AF_INET) {
+		addr = &sa->in4.sin_addr;
+		width = INET_ADDRSTRLEN - 1;
+	} else {
+		addr = &sa->in6.sin6_addr;
+		width = INET6_ADDRSTRLEN - 1;
+	}
+	inet_ntop(af, addr, buf, sizeof(buf));
+	sbuf_printf(sb, "%*s ", width, buf);
+
+	sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire);
+
+	sbuf_putc(sb, '\n');
+
+	/* Truncate a partial line. */
+	if (sbuf_error(sb) != 0)
+		sbuf_setpos(sb, len);
+}
+
+static struct vxlan_socket *
+vxlan_socket_alloc(const union vxlan_sockaddr *sa)
+{
+	struct vxlan_socket *vso;
+	int i;
+
+	vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO);
+	rm_init(&vso->vxlso_lock, "vxlansorm");
+	refcount_init(&vso->vxlso_refcnt, 0);
+	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++)
+		LIST_INIT(&vso->vxlso_vni_hash[i]);
+	vso->vxlso_laddr = *sa;
+
+	return (vso);
+}
+
+static void
+vxlan_socket_destroy(struct vxlan_socket *vso)
+{
+	struct socket *so;
+	struct vxlan_socket_mc_info *mc;
+	int i;
+
+	for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
+		mc = &vso->vxlso_mc[i];
+		KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC,
+		    ("%s: socket %p mc[%d] still has address",
+		     __func__, vso, i));
+	}
+
+	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
+		KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]),
+		    ("%s: socket %p vni_hash[%d] not empty",
+		     __func__, vso, i));
+	}
+
+	so = vso->vxlso_sock;
+	if (so != NULL) {
+		vso->vxlso_sock = NULL;
+		soclose(so);
+	}
+
+	rm_destroy(&vso->vxlso_lock);
+	free(vso, M_VXLAN);
+}
+
+static void
+vxlan_socket_release(struct vxlan_socket *vso)
+{
+	int destroy;
+
+	mtx_lock(&vxlan_list_mtx);
+	destroy = VXLAN_SO_RELEASE(vso);
+	if (destroy != 0)
+		LIST_REMOVE(vso, vxlso_entry);
+	mtx_unlock(&vxlan_list_mtx);
+
+	if (destroy != 0)
+		vxlan_socket_destroy(vso);
+}
+
+static struct vxlan_socket *
+vxlan_socket_lookup(union vxlan_sockaddr *vxlsa)
+{
+	struct vxlan_socket *vso;
+
+	mtx_lock(&vxlan_list_mtx);
+	LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) {
+		if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) {
+			VXLAN_SO_ACQUIRE(vso);
+			break;
+		}
+	}
+	mtx_unlock(&vxlan_list_mtx);
+
+	return (vso);
+}
+
+static void
+vxlan_socket_insert(struct vxlan_socket *vso)
+{
+
+	mtx_lock(&vxlan_list_mtx);
+	VXLAN_SO_ACQUIRE(vso);
+	LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry);
+	mtx_unlock(&vxlan_list_mtx);
+}
+
+static int
+vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp)
+{
+	struct thread *td;
+	int error;
+
+	td = curthread;
+
+	error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock,
+	    SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
+	if (error) {
+		if_printf(ifp, "cannot create socket: %d\n", error);
+		return (error);
+	}
+
+	error = udp_set_kernel_tunneling(vso->vxlso_sock,
+	    vxlan_rcv_udp_packet, vso);
+	if (error) {
+		if_printf(ifp, "cannot set tunneling function: %d\n", error);
+		return (error);
+	}
+
+	if (vxlan_reuse_port != 0) {
+		struct sockopt sopt;
+		int val = 1;
+
+		bzero(&sopt, sizeof(sopt));
+		sopt.sopt_dir = SOPT_SET;
+		sopt.sopt_level = IPPROTO_IP;
+		sopt.sopt_name = SO_REUSEPORT;
+		sopt.sopt_val = &val;
+		sopt.sopt_valsize = sizeof(val);
+		error = sosetopt(vso->vxlso_sock, &sopt);
+		if (error) {
+			if_printf(ifp,
+			    "cannot set REUSEADDR socket opt: %d\n", error);
+			return (error);
+		}
+	}
+
+	return (0);
+}
+
+static int
+vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp)
+{
+	union vxlan_sockaddr laddr;
+	struct thread *td;
+	int error;
+
+	td = curthread;
+	laddr = vso->vxlso_laddr;
+
+	error = sobind(vso->vxlso_sock, &laddr.sa, td);
+	if (error) {
+		if (error != EADDRINUSE)
+			if_printf(ifp, "cannot bind socket: %d\n", error);
+		return (error);
+	}
+
+	return (0);
+}
+
+static int
+vxlan_socket_create(struct ifnet *ifp, int multicast,
+    const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop)
+{
+	union vxlan_sockaddr laddr;
+	struct vxlan_socket *vso;
+	int error;
+
+	laddr = *saddr;
+
+	/*
+	 * If this socket will be multicast, then only the local port
+	 * must be specified when binding.
+	 */
+	if (multicast != 0) {
+		if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
+			laddr.in4.sin_addr.s_addr = INADDR_ANY;
+#ifdef INET6
+		else
+			laddr.in6.sin6_addr = in6addr_any;
+#endif
+	}
+
+	vso = vxlan_socket_alloc(&laddr);
+	if (vso == NULL)
+		return (ENOMEM);
+
+	error = vxlan_socket_init(vso, ifp);
+	if (error)
+		goto fail;
+
+	error = vxlan_socket_bind(vso, ifp);
+	if (error)
+		goto fail;
+
+	/*
+	 * There is a small window between the bind completing and
+	 * inserting the socket, so that a concurrent create may fail.
+	 * Let's not worry about that for now.
+	 */
+	vxlan_socket_insert(vso);
+	*vsop = vso;
+
+	return (0);
+
+fail:
+	vxlan_socket_destroy(vso);
+
+	return (error);
+}
+
+static void
+vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp,
+    struct vxlan_softc_head *list)
+{
+	struct rm_priotracker tracker;
+	struct vxlan_softc *sc;
+	int i;
+
+	VXLAN_SO_RLOCK(vso, &tracker);
+	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
+		LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry)
+			vxlan_ifdetach(sc, ifp, list);
+	}
+	VXLAN_SO_RUNLOCK(vso, &tracker);
+}
+
+static struct vxlan_socket *
+vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa)
+{
+	struct vxlan_socket *vso;
+	union vxlan_sockaddr laddr;
+
+	laddr = *vxlsa;
+
+	if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
+		laddr.in4.sin_addr.s_addr = INADDR_ANY;
+#ifdef INET6
+	else
+		laddr.in6.sin6_addr = in6addr_any;
+#endif
+
+	vso = vxlan_socket_lookup(&laddr);
+
+	return (vso);
+}
+
+static int
+vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc,
+    const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
+    int ifidx)
+{
+
+	if (!vxlan_sockaddr_in_any(local) &&
+	    !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa))
+		return (0);
+	if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa))
+		return (0);
+	if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx)
+		return (0);
+
+	return (1);
+}
+
+static int
+vxlan_socket_mc_join_group(struct vxlan_socket *vso,
+    const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
+    int *ifidx, union vxlan_sockaddr *source)
+{
+	struct sockopt sopt;
+	int error;
+
+	*source = *local;
+
+	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
+		struct ip_mreq mreq;
+
+		mreq.imr_multiaddr = group->in4.sin_addr;
+		mreq.imr_interface = local->in4.sin_addr;
+
+		bzero(&sopt, sizeof(sopt));
+		sopt.sopt_dir = SOPT_SET;
+		sopt.sopt_level = IPPROTO_IP;
+		sopt.sopt_name = IP_ADD_MEMBERSHIP;
+		sopt.sopt_val = &mreq;
+		sopt.sopt_valsize = sizeof(mreq);
+		error = sosetopt(vso->vxlso_sock, &sopt);
+		if (error)
+			return (error);
+
+		/*
+		 * BMV: Ideally, there would be a formal way for us to get
+		 * the local interface that was selected based on the
+		 * imr_interface address. We could then update *ifidx so
+		 * vxlan_sockaddr_mc_info_match() would return a match for
+		 * later creates that explicitly set the multicast interface.
+		 *
+		 * If we really need to, we can of course look in the INP's
+		 * membership list:
+		 *     sotoinpcb(vso->vxlso_sock)->inp_moptions->
+		 *         imo_membership[]->inm_ifp
+		 * similarly to imo_match_group().
+		 */
+		source->in4.sin_addr = local->in4.sin_addr;
+
+	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
+		struct ipv6_mreq mreq;
+
+		mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
+		mreq.ipv6mr_interface = *ifidx;
+
+		bzero(&sopt, sizeof(sopt));
+		sopt.sopt_dir = SOPT_SET;
+		sopt.sopt_level = IPPROTO_IPV6;
+		sopt.sopt_name = IPV6_JOIN_GROUP;
+		sopt.sopt_val = &mreq;
+		sopt.sopt_valsize = sizeof(mreq);
+		error = sosetopt(vso->vxlso_sock, &sopt);
+		if (error)
+			return (error);
+
+		/*
+		 * BMV: As with IPv4, we would really like to know what
+		 * interface in6p_lookup_mcast_ifp() selected.
+		 */
+	} else
+		error = EAFNOSUPPORT;
+
+	return (error);
+}
+
+static int
+vxlan_socket_mc_leave_group(struct vxlan_socket *vso,
+    const union vxlan_sockaddr *group, const union vxlan_sockaddr *source,
+    int ifidx)
+{
+	struct sockopt sopt;
+	int error;
+
+	bzero(&sopt, sizeof(sopt));
+	sopt.sopt_dir = SOPT_SET;
+
+	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
+		struct ip_mreq mreq;
+
+		mreq.imr_multiaddr = group->in4.sin_addr;
+		mreq.imr_interface = source->in4.sin_addr;
+
+		sopt.sopt_level = IPPROTO_IP;
+		sopt.sopt_name = IP_DROP_MEMBERSHIP;
+		sopt.sopt_val = &mreq;
+		sopt.sopt_valsize = sizeof(mreq);
+		error = sosetopt(vso->vxlso_sock, &sopt);
+
+	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
+		struct ipv6_mreq mreq;
+
+		mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
+		mreq.ipv6mr_interface = ifidx;
+
+		sopt.sopt_level = IPPROTO_IPV6;
+		sopt.sopt_name = IPV6_LEAVE_GROUP;
+		sopt.sopt_val = &mreq;
+		sopt.sopt_valsize = sizeof(mreq);
+		error = sosetopt(vso->vxlso_sock, &sopt);
+
+	} else
+		error = EAFNOSUPPORT;
+
+	return (error);
+}
+
+static int
+vxlan_socket_mc_add_group(struct vxlan_socket *vso,
+    const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
+    int ifidx, int *idx)
+{
+	union vxlan_sockaddr source;
+	struct vxlan_socket_mc_info *mc;
+	int i, empty, error;
+
+	/*
+	 * Within a socket, the same multicast group may be used by multiple
+	 * interfaces, each with a different network identifier. But a socket
+	 * may only join a multicast group once, so keep track of the users
+	 * here.
+	 */
+
+	VXLAN_SO_WLOCK(vso);
+	for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
+		mc = &vso->vxlso_mc[i];
+
+		if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
+			empty++;
+			continue;
+		}
+
+		if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx))
+			goto out;
+	}
+	VXLAN_SO_WUNLOCK(vso);
+
+	if (empty == 0)
+		return (ENOSPC);
+
+	error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source);
+	if (error)
+		return (error);
+
+	VXLAN_SO_WLOCK(vso);
+	for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
+		mc = &vso->vxlso_mc[i];
+
+		if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
+			vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa);
+			vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa);
+			mc->vxlsomc_ifidx = ifidx;
+			goto out;
+		}
+	}
+	VXLAN_SO_WUNLOCK(vso);
+
+	error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx);
+	MPASS(error == 0);
+
+	return (ENOSPC);
+
+out:
+	mc->vxlsomc_users++;
+	VXLAN_SO_WUNLOCK(vso);
+
+	*idx = i;
+
+	return (0);
+}
+
+static void
+vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx)
+{
+	union vxlan_sockaddr group, source;
+	struct vxlan_socket_mc_info *mc;
+	int ifidx, leave;
+
+	KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS,
+	    ("%s: vso %p idx %d out of bounds", __func__, vso, idx));
+
+	leave = 0;
+	mc = &vso->vxlso_mc[idx];
+
+	VXLAN_SO_WLOCK(vso);
+	mc->vxlsomc_users--;
+	if (mc->vxlsomc_users == 0) {
+		group = mc->vxlsomc_gaddr;
+		source = mc->vxlsomc_saddr;
+		ifidx = mc->vxlsomc_ifidx;
+		bzero(mc, sizeof(*mc));
+		leave = 1;
+	}
+	VXLAN_SO_WUNLOCK(vso);
+
+	if (leave != 0) {
+		/*
+		 * Our socket's membership in this group may have already
+		 * been removed if we joined through an interface that's
+		 * been detached.
+		 */
+		vxlan_socket_mc_leave_group(vso, &group, &source, ifidx);
+	}
+}
+
+static struct vxlan_softc *
+vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni)
+{
+	struct vxlan_softc *sc;
+	uint32_t hash;
+
+	VXLAN_SO_LOCK_ASSERT(vso);
+	hash = VXLAN_SO_VNI_HASH(vni);
+
+	LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) {
+		if (sc->vxl_vni == vni) {
+			VXLAN_ACQUIRE(sc);
+			break;
+		}
+	}
+
+	return (sc);
+}
+
+static struct vxlan_softc *
+vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni)
+{
+	struct rm_priotracker tracker;
+	struct vxlan_softc *sc;
+
+	VXLAN_SO_RLOCK(vso, &tracker);
+	sc = vxlan_socket_lookup_softc_locked(vso, vni);
+	VXLAN_SO_RUNLOCK(vso, &tracker);
+
+	return (sc);
+}
+
+static int
+vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
+{
+	struct vxlan_softc *tsc;
+	uint32_t vni, hash;
+
+	vni = sc->vxl_vni;
+	hash = VXLAN_SO_VNI_HASH(vni);
+
+	VXLAN_SO_WLOCK(vso);
+	tsc = vxlan_socket_lookup_softc_locked(vso, vni);
+	if (tsc != NULL) {
+		VXLAN_SO_WUNLOCK(vso);
+		vxlan_release(tsc);
+		return (EEXIST);
+	}
+
+	VXLAN_ACQUIRE(sc);
+	LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry);
+	VXLAN_SO_WUNLOCK(vso);
+
+	return (0);
+}
+
+static void
+vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
+{
+
+	VXLAN_SO_WLOCK(vso);
+	LIST_REMOVE(sc, vxl_entry);
+	VXLAN_SO_WUNLOCK(vso);
+
+	vxlan_release(sc);
+}
+
+static struct ifnet *
+vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4)
+{
+	struct ifnet *ifp;
+
+	VXLAN_LOCK_ASSERT(sc);
+
+	if (ipv4 && sc->vxl_im4o != NULL)
+		ifp = sc->vxl_im4o->imo_multicast_ifp;
+	else if (!ipv4 && sc->vxl_im6o != NULL)
+		ifp = sc->vxl_im6o->im6o_multicast_ifp;
+	else
+		ifp = NULL;
+
+	if (ifp != NULL)
+		if_ref(ifp);
+
+	return (ifp);
+}
+
+static void
+vxlan_free_multicast(struct vxlan_softc *sc)
+{
+
+	if (sc->vxl_mc_ifp != NULL) {
+		if_rele(sc->vxl_mc_ifp);
+		sc->vxl_mc_ifp = NULL;
+		sc->vxl_mc_ifindex = 0;
+	}
+
+	if (sc->vxl_im4o != NULL) {
+		free(sc->vxl_im4o, M_VXLAN);
+		sc->vxl_im4o = NULL;
+	}
+
+	if (sc->vxl_im6o != NULL) {
+		free(sc->vxl_im6o, M_VXLAN);
+		sc->vxl_im6o = NULL;
+	}
+}
+
+static int
+vxlan_setup_multicast_interface(struct vxlan_softc *sc)
+{
+	struct ifnet *ifp;
+
+	ifp = ifunit_ref(sc->vxl_mc_ifname);
+	if (ifp == NULL) {
+		if_printf(sc->vxl_ifp, "multicast interfaces %s does "
+		    "not exist\n", sc->vxl_mc_ifname);
+		return (ENOENT);
+	}
+
+	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+		if_printf(sc->vxl_ifp, "interface %s does not support "
+		     "multicast\n", sc->vxl_mc_ifname);
+		if_rele(ifp);
+		return (ENOTSUP);
+	}
+
+	sc->vxl_mc_ifp = ifp;
+	sc->vxl_mc_ifindex = ifp->if_index;
+
+	return (0);
+}
+
+static int
+vxlan_setup_multicast(struct vxlan_softc *sc)
+{
+	const union vxlan_sockaddr *group;
+	int error;
+
+	group = &sc->vxl_dst_addr;
+	error = 0;
+
+	if (sc->vxl_mc_ifname[0] != '\0') {
+		error = vxlan_setup_multicast_interface(sc);
+		if (error)
+			return (error);
+	}
+
+	/*
+	 * Initialize an multicast options structure that is sufficiently
+	 * populated for use in the respective IP output routine. This
+	 * structure is typically stored in the socket, but our sockets
+	 * may be shared among multiple interfaces.
+	 */
+	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
+		sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN,
+		    M_ZERO | M_WAITOK);
+		sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp;
+		sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
+		sc->vxl_im4o->imo_multicast_vif = -1;
+	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
+		sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN,
+		    M_ZERO | M_WAITOK);
+		sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp;
+		sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
+	}
+
+	return (error);
+}
+
+static int
+vxlan_setup_socket(struct vxlan_softc *sc)
+{
+	struct vxlan_socket *vso;
+	struct ifnet *ifp;
+	union vxlan_sockaddr *saddr, *daddr;
+	int multicast, error;
+
+	vso = NULL;
+	ifp = sc->vxl_ifp;
+	saddr = &sc->vxl_src_addr;
+	daddr = &sc->vxl_dst_addr;
+
+	multicast = vxlan_sockaddr_in_multicast(daddr);
+	MPASS(multicast != -1);
+	sc->vxl_vso_mc_index = -1;
+
+	/*
+	 * Try to create the socket. If that fails, attempt to use an
+	 * existing socket.
+	 */
+	error = vxlan_socket_create(ifp, multicast, saddr, &vso);
+	if (error) {
+		if (multicast != 0)
+			vso = vxlan_socket_mc_lookup(saddr);
+		else
+			vso = vxlan_socket_lookup(saddr);
+
+		if (vso == NULL) {
+			if_printf(ifp, "cannot create socket (error: %d), "
+			    "and no existing socket found\n", error);
+			goto out;
+		}
+	}
+
+	if (multicast != 0) {
+		error = vxlan_setup_multicast(sc);
+		if (error)
+			goto out;
+
+		error = vxlan_socket_mc_add_group(vso, daddr, saddr,
+		    sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index);
+		if (error)
+			goto out;
+	}
+
+	sc->vxl_sock = vso;
+	error = vxlan_socket_insert_softc(vso, sc);
+	if (error) {
+		sc->vxl_sock = NULL;
+		if_printf(ifp, "network identifier %d already exists in "
+		    "this socket\n", sc->vxl_vni);
+		goto out;
+	}
+
+	return (0);
+
+out:
+	if (vso != NULL) {
+		if (sc->vxl_vso_mc_index != -1) {
+			vxlan_socket_mc_release_group_by_idx(vso,
+			    sc->vxl_vso_mc_index);
+			sc->vxl_vso_mc_index = -1;
+		}
+		if (multicast != 0)
+			vxlan_free_multicast(sc);
+		vxlan_socket_release(vso);
+	}
+
+	return (error);
+}
+
+static void
+vxlan_setup_interface(struct vxlan_softc *sc)
+{
+	struct ifnet *ifp;
+
+	ifp = sc->vxl_ifp;
+	ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr);
+
+	if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0)
+		ifp->if_hdrlen += sizeof(struct ip);
+	else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0)
+		ifp->if_hdrlen += sizeof(struct ip6_hdr);
+}
+
+static int
+vxlan_valid_init_config(struct vxlan_softc *sc)
+{
+	const char *reason;
+
+	if (vxlan_check_vni(sc->vxl_vni) != 0) {
+		reason = "invalid virtual network identifier specified";
+		goto fail;
+	}
+
+	if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) {
+		reason = "source address type is not supported";
+		goto fail;
+	}
+
+	if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) {
+		reason = "destination address type is not supported";
+		goto fail;
+	}
+
+	if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) {
+		reason = "no valid destination address specified";
+		goto fail;
+	}
+
+	if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 &&
+	    sc->vxl_mc_ifname[0] != '\0') {
+		reason = "can only specify interface with a group address";
+		goto fail;
+	}
+
+	if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
+		if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^
+		    VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) {
+			reason = "source and destination address must both "
+			    "be either IPv4 or IPv6";
+			goto fail;
+		}
+	}
+
+	if (sc->vxl_src_addr.in4.sin_port == 0) {
+		reason = "local port not specified";
+		goto fail;
+	}
+
+	if (sc->vxl_dst_addr.in4.sin_port == 0) {
+		reason = "remote port not specified";
+		goto fail;
+	}
+
+	return (0);
+
+fail:
+	if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason);
+	return (EINVAL);
+}
+
+static void
+vxlan_init_wait(struct vxlan_softc *sc)
+{
+
+	VXLAN_LOCK_WASSERT(sc);
+	while (sc->vxl_flags & VXLAN_FLAG_INIT)
+		rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz);
+}
+
+static void
+vxlan_init_complete(struct vxlan_softc *sc)
+{
+
+	VXLAN_WLOCK(sc);
+	sc->vxl_flags &= ~VXLAN_FLAG_INIT;
+	wakeup(sc);
+	VXLAN_WUNLOCK(sc);
+}
+
+static void
+vxlan_init(void *xsc)
+{
+	static const uint8_t empty_mac[ETHER_ADDR_LEN];
+	struct vxlan_softc *sc;
+	struct ifnet *ifp;
+
+	sc = xsc;
+	ifp = sc->vxl_ifp;
+
+	VXLAN_WLOCK(sc);
+	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+		VXLAN_WUNLOCK(sc);
+		return;
+	}
+	sc->vxl_flags |= VXLAN_FLAG_INIT;
+	VXLAN_WUNLOCK(sc);
+
+	if (vxlan_valid_init_config(sc) != 0)
+		goto out;
+
+	vxlan_setup_interface(sc);
+
+	if (vxlan_setup_socket(sc) != 0)
+		goto out;
+
+	/* Initialize the default forwarding entry. */
+	vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac,
+	    &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC);
+
+	VXLAN_WLOCK(sc);
+	ifp->if_drv_flags |= IFF_DRV_RUNNING;
+	callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz,
+	    vxlan_timer, sc);
+	VXLAN_WUNLOCK(sc);
+
+out:
+	vxlan_init_complete(sc);
+}
+
+static void
+vxlan_release(struct vxlan_softc *sc)
+{
+
+	/*
+	 * The softc may be destroyed as soon as we release our reference,
+	 * so we cannot serialize the wakeup with the softc lock. We use a
+	 * timeout in our sleeps so a missed wakeup is unfortunate but not
+	 * fatal.
+	 */
+	if (VXLAN_RELEASE(sc) != 0)
+		wakeup(sc);
+}
+
+static void
+vxlan_teardown_wait(struct vxlan_softc *sc)
+{
+
+	VXLAN_LOCK_WASSERT(sc);
+	while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
+		rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz);
+}
+
+static void
+vxlan_teardown_complete(struct vxlan_softc *sc)
+{
+
+	VXLAN_WLOCK(sc);
+	sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN;
+	wakeup(sc);
+	VXLAN_WUNLOCK(sc);
+}
+
+static void
+vxlan_teardown_locked(struct vxlan_softc *sc)
+{
+	struct ifnet *ifp;
+	struct vxlan_socket *vso;
+
+	ifp = sc->vxl_ifp;
+
+	VXLAN_LOCK_WASSERT(sc);
+	MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN);
+
+	ifp->if_flags &= ~IFF_UP;
+	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+	callout_stop(&sc->vxl_callout);
+	vso = sc->vxl_sock;
+	sc->vxl_sock = NULL;
+
+	VXLAN_WUNLOCK(sc);
+
+	if (vso != NULL) {
+		vxlan_socket_remove_softc(vso, sc);
+
+		if (sc->vxl_vso_mc_index != -1) {
+			vxlan_socket_mc_release_group_by_idx(vso,
+			    sc->vxl_vso_mc_index);
+			sc->vxl_vso_mc_index = -1;
+		}
+	}
+
+	VXLAN_WLOCK(sc);
+	while (sc->vxl_refcnt != 0)
+		rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz);
+	VXLAN_WUNLOCK(sc);
+
+	callout_drain(&sc->vxl_callout);
+
+	vxlan_free_multicast(sc);
+	if (vso != NULL)
+		vxlan_socket_release(vso);
+
+	vxlan_teardown_complete(sc);
+}
+
+static void
+vxlan_teardown(struct vxlan_softc *sc)
+{
+
+	VXLAN_WLOCK(sc);
+	if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) {
+		vxlan_teardown_wait(sc);
+		VXLAN_WUNLOCK(sc);
+		return;
+	}
+
+	sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
+	vxlan_teardown_locked(sc);
+}
+
+static void
+vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp,
+    struct vxlan_softc_head *list)
+{
+
+	VXLAN_WLOCK(sc);
+
+	if (sc->vxl_mc_ifp != ifp)
+		goto out;
+	if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
+		goto out;
+
+	sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
+	LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list);
+
+out:
+	VXLAN_WUNLOCK(sc);
+}
+
+static void
+vxlan_timer(void *xsc)
+{
+	struct vxlan_softc *sc;
+
+	sc = xsc;
+	VXLAN_LOCK_WASSERT(sc);
+
+	vxlan_ftable_expire(sc);
+	callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz);
+}
+
+static int
+vxlan_ioctl_ifflags(struct vxlan_softc *sc)
+{
+	struct ifnet *ifp;
+
+	ifp = sc->vxl_ifp;
+
+	if (ifp->if_flags & IFF_UP) {
+		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+			vxlan_init(sc);
+	} else {
+		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+			vxlan_teardown(sc);
+	}
+
+	return (0);
+}
+
+static int
+vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg)
+{
+	struct rm_priotracker tracker;
+	struct ifvxlancfg *cfg;
+
+	cfg = arg;
+	bzero(cfg, sizeof(*cfg));
+
+	VXLAN_RLOCK(sc, &tracker);
+	cfg->vxlc_vni = sc->vxl_vni;
+	memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr,
+	    sizeof(union vxlan_sockaddr));
+	memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr,
+	    sizeof(union vxlan_sockaddr));
+	cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex;
+	cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt;
+	cfg->vxlc_ftable_max = sc->vxl_ftable_max;
+	cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout;
+	cfg->vxlc_port_min = sc->vxl_min_port;
+	cfg->vxlc_port_max = sc->vxl_max_port;
+	cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0;
+	cfg->vxlc_ttl = sc->vxl_ttl;
+	VXLAN_RUNLOCK(sc, &tracker);
+
+	return (0);
+}
+
+static int
+vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg)
+{
+	struct ifvxlancmd *cmd;
+	int error;
+
+	cmd = arg;
+
+	if (vxlan_check_vni(cmd->vxlcmd_vni) != 0)
+		return (EINVAL);
+
+	VXLAN_WLOCK(sc);
+	if (vxlan_can_change_config(sc)) {
+		sc->vxl_vni = cmd->vxlcmd_vni;
+		error = 0;
+	} else
+		error = EBUSY;
+	VXLAN_WUNLOCK(sc);
+
+	return (error);
+}
+
+static int
+vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg)
+{
+	struct ifvxlancmd *cmd;
+	union vxlan_sockaddr *vxlsa;
+	int error;
+
+	cmd = arg;
+	vxlsa = &cmd->vxlcmd_sa;
+
+	if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
+		return (EINVAL);
+	if (vxlan_sockaddr_in_multicast(vxlsa) != 0)
+		return (EINVAL);
+
+	VXLAN_WLOCK(sc);
+	if (vxlan_can_change_config(sc)) {
+		vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa);
+		error = 0;
+	} else
+		error = EBUSY;
+	VXLAN_WUNLOCK(sc);
+
+	return (error);
+}
+
+static int
+vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg)
+{
+	struct ifvxlancmd *cmd;
+	union vxlan_sockaddr *vxlsa;
+	int error;
+
+	cmd = arg;
+	vxlsa = &cmd->vxlcmd_sa;
+
+	if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
+		return (EINVAL);
+
+	VXLAN_WLOCK(sc);
+	if (vxlan_can_change_config(sc)) {
+		vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa);
+		error = 0;
+	} else
+		error = EBUSY;
+	VXLAN_WUNLOCK(sc);
+
+	return (error);
+}
+
+static int
+vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg)
+{
+	struct ifvxlancmd *cmd;
+	int error;
+
+	cmd = arg;
+
+	if (cmd->vxlcmd_port == 0)
+		return (EINVAL);
+
+	VXLAN_WLOCK(sc);
+	if (vxlan_can_change_config(sc)) {
+		sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port);
+		error = 0;
+	} else
+		error = EBUSY;
+	VXLAN_WUNLOCK(sc);
+
+	return (error);
+}
+
+static int
+vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg)
+{
+	struct ifvxlancmd *cmd;
+	int error;
+
+	cmd = arg;
+
+	if (cmd->vxlcmd_port == 0)
+		return (EINVAL);
+
+	VXLAN_WLOCK(sc);
+	if (vxlan_can_change_config(sc)) {
+		sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port);
+		error = 0;
+	} else
+		error = EBUSY;
+	VXLAN_WUNLOCK(sc);
+
+	return (error);
+}
+
+static int
+vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg)
+{
+	struct ifvxlancmd *cmd;
+	uint16_t min, max;
+	int error;
+
+	cmd = arg;
+	min = cmd->vxlcmd_port_min;
+	max = cmd->vxlcmd_port_max;
+
+	if (max < min)
+		return (EINVAL);
+
+	VXLAN_WLOCK(sc);
+	if (vxlan_can_change_config(sc)) {
+		sc->vxl_min_port = min;
+		sc->vxl_max_port = max;
+		error = 0;
+	} else
+		error = EBUSY;
+	VXLAN_WUNLOCK(sc);
+
+	return (error);
+}
+
+static int
+vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg)
+{
+	struct ifvxlancmd *cmd;
+	int error;
+
+	cmd = arg;
+
+	VXLAN_WLOCK(sc);
+	if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) {
+		sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout;
+		error = 0;
+	} else
+		error = EINVAL;
+	VXLAN_WUNLOCK(sc);
+
+	return (error);
+}
+
+static int
+vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg)
+{
+	struct ifvxlancmd *cmd;
+	int error;
+
+	cmd = arg;
+
+	VXLAN_WLOCK(sc);
+	if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) {
+		sc->vxl_ftable_max = cmd->vxlcmd_ftable_max;
+		error = 0;
+	} else
+		error = EINVAL;
+	VXLAN_WUNLOCK(sc);
+
+	return (error);
+}
+
+static int
+vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg)
+{
+	struct ifvxlancmd *cmd;
+	int error;
+
+	cmd = arg;
+
+	VXLAN_WLOCK(sc);
+	if (vxlan_can_change_config(sc)) {
+		strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ);
+		error = 0;
+	} else
+		error = EBUSY;
+	VXLAN_WUNLOCK(sc);
+
+	return (error);
+}
+
+static int
+vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg)
+{
+	struct ifvxlancmd *cmd;
+	int error;
+
+	cmd = arg;
+
+	VXLAN_WLOCK(sc);
+	if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) {
+		sc->vxl_ttl = cmd->vxlcmd_ttl;
+		if (sc->vxl_im4o != NULL)
+			sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
+		if (sc->vxl_im6o != NULL)
+			sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
+		error = 0;
+	} else
+		error = EINVAL;
+	VXLAN_WUNLOCK(sc);
+
+	return (error);
+}
+
+static int
+vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg)
+{
+	struct ifvxlancmd *cmd;
+
+	cmd = arg;
+
+	VXLAN_WLOCK(sc);
+	if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN)
+		sc->vxl_flags |= VXLAN_FLAG_LEARN;
+	else
+		sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
+	VXLAN_WUNLOCK(sc);
+
+	return (0);
+}
+
+static int
+vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg)
+{
+	union vxlan_sockaddr vxlsa;
+	struct ifvxlancmd *cmd;
+	struct vxlan_ftable_entry *fe;
+	int error;
+
+	cmd = arg;
+	vxlsa = cmd->vxlcmd_sa;
+
+	if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa))
+		return (EINVAL);
+	if (vxlan_sockaddr_in_any(&vxlsa) != 0)
+		return (EINVAL);
+	if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
+		return (EINVAL);
+	/* BMV: We could support both IPv4 and IPv6 later. */
+	if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family)
+		return (EAFNOSUPPORT);
+
+	fe = vxlan_ftable_entry_alloc();
+	if (fe == NULL)
+		return (ENOMEM);
+
+	if (vxlsa.in4.sin_port == 0)
+		vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
+
+	vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa,
+	    VXLAN_FE_FLAG_STATIC);
+
+	VXLAN_WLOCK(sc);
+	error = vxlan_ftable_entry_insert(sc, fe);
+	VXLAN_WUNLOCK(sc);
+
+	if (error)
+		vxlan_ftable_entry_free(fe);
+
+	return (error);
+}
+
+static int
+vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg)
+{
+	struct ifvxlancmd *cmd;
+	struct vxlan_ftable_entry *fe;
+	int error;
+
+	cmd = arg;
+
+	VXLAN_WLOCK(sc);
+	fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac);
+	if (fe != NULL) {
+		vxlan_ftable_entry_destroy(sc, fe);
+		error = 0;
+	} else
+		error = ENOENT;
+	VXLAN_WUNLOCK(sc);
+
+	return (error);
+}
+
+static int
+vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg)
+{
+	struct ifvxlancmd *cmd;
+	int all;
+
+	cmd = arg;
+	all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL;
+
+	VXLAN_WLOCK(sc);
+	vxlan_ftable_flush(sc, all);
+	VXLAN_WUNLOCK(sc);
+
+	return (0);
+}
+
+static int
+vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get)
+{
+	const struct vxlan_control *vc;
+	union {
+		struct ifvxlancfg	cfg;
+		struct ifvxlancmd	cmd;
+	} args;
+	int out, error;
+
+	if (ifd->ifd_cmd >= vxlan_control_table_size)
+		return (EINVAL);
+
+	bzero(&args, sizeof(args));
+	vc = &vxlan_control_table[ifd->ifd_cmd];
+	out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0;
+
+	if ((get != 0 && out == 0) || (get == 0 && out != 0))
+		return (EINVAL);
+
+	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) {
+		error = priv_check(curthread, PRIV_NET_VXLAN);
+		if (error)
+			return (error);
+	}
+
+	if (ifd->ifd_len != vc->vxlc_argsize ||
+	    ifd->ifd_len > sizeof(args))
+		return (EINVAL);
+
+	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) {
+		error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
+		if (error)
+			return (error);
+	}
+
+	error = vc->vxlc_func(sc, &args);
+	if (error)
+		return (error);
+
+	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) {
+		error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
+		if (error)
+			return (error);
+	}
+
+	return (0);
+}
+
+static int
+vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	struct vxlan_softc *sc;
+	struct ifreq *ifr;
+	struct ifdrv *ifd;
+	int error;
+
+	sc = ifp->if_softc;
+	ifr = (struct ifreq *) data;
+	ifd = (struct ifdrv *) data;
+
+	switch (cmd) {
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		error = 0;
+		break;
+
+	case SIOCGDRVSPEC:
+	case SIOCSDRVSPEC:
+		error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC);
+		break;
+
+	case SIOCSIFFLAGS:
+		error = vxlan_ioctl_ifflags(sc);
+		break;
+	default:
+		error = ether_ioctl(ifp, cmd, data);
+		break;
+	}
+
+	return (error);
+}
+
+#if defined(INET) || defined(INET6)
+static uint16_t
+vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m)
+{
+	int range;
+	uint32_t hash;
+
+	range = sc->vxl_max_port - sc->vxl_min_port + 1;
+
+	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE &&
+	    M_HASHTYPE_GET(m) != M_HASHTYPE_OPAQUE)
+		hash = m->m_pkthdr.flowid;
+	else
+		hash = jenkins_hash(m->m_data, ETHER_HDR_LEN,
+		    sc->vxl_port_hash_key);
+
+	return (sc->vxl_min_port + (hash % range));
+}
+
+static void
+vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff,
+    uint16_t srcport, uint16_t dstport)
+{
+	struct vxlanudphdr *hdr;
+	struct udphdr *udph;
+	struct vxlan_header *vxh;
+	int len;
+
+	len = m->m_pkthdr.len - ipoff;
+	MPASS(len >= sizeof(struct vxlanudphdr));
+	hdr = mtodo(m, ipoff);
+
+	udph = &hdr->vxlh_udp;
+	udph->uh_sport = srcport;
+	udph->uh_dport = dstport;
+	udph->uh_ulen = htons(len);
+	udph->uh_sum = 0;
+
+	vxh = &hdr->vxlh_hdr;
+	vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI);
+	vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT);
+}
+#endif
+
+static int
+vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
+    struct mbuf *m)
+{
+#ifdef INET
+	struct ifnet *ifp;
+	struct ip *ip;
+	struct in_addr srcaddr, dstaddr;
+	uint16_t srcport, dstport;
+	int len, mcast, error;
+
+	ifp = sc->vxl_ifp;
+	srcaddr = sc->vxl_src_addr.in4.sin_addr;
+	srcport = vxlan_pick_source_port(sc, m);
+	dstaddr = fvxlsa->in4.sin_addr;
+	dstport = fvxlsa->in4.sin_port;
+
+	M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr),
+	    M_NOWAIT);
+	if (m == NULL) {
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+		return (ENOBUFS);
+	}
+
+	len = m->m_pkthdr.len;
+
+	ip = mtod(m, struct ip *);
+	ip->ip_tos = 0;
+	ip->ip_len = htons(len);
+	ip->ip_off = 0;
+	ip->ip_ttl = sc->vxl_ttl;
+	ip->ip_p = IPPROTO_UDP;
+	ip->ip_sum = 0;
+	ip->ip_src = srcaddr;
+	ip->ip_dst = dstaddr;
+
+	vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport);
+
+	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
+	m->m_flags &= ~(M_MCAST | M_BCAST);
+
+	error = ip_output(m, NULL, NULL, 0, sc->vxl_im4o, NULL);
+	if (error == 0) {
+		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
+		if (mcast != 0)
+			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
+	} else
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+
+	return (error);
+#else
+	m_freem(m);
+	return (ENOTSUP);
+#endif
+}
+
+static int
+vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
+    struct mbuf *m)
+{
+#ifdef INET6
+	struct ifnet *ifp;
+	struct ip6_hdr *ip6;
+	const struct in6_addr *srcaddr, *dstaddr;
+	uint16_t srcport, dstport;
+	int len, mcast, error;
+
+	ifp = sc->vxl_ifp;
+	srcaddr = &sc->vxl_src_addr.in6.sin6_addr;
+	srcport = vxlan_pick_source_port(sc, m);
+	dstaddr = &fvxlsa->in6.sin6_addr;
+	dstport = fvxlsa->in6.sin6_port;
+
+	M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr),
+	    M_NOWAIT);
+	if (m == NULL) {
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+		return (ENOBUFS);
+	}
+
+	len = m->m_pkthdr.len;
+
+	ip6 = mtod(m, struct ip6_hdr *);
+	ip6->ip6_flow = 0;		/* BMV: Keep in forwarding entry? */
+	ip6->ip6_vfc = IPV6_VERSION;
+	ip6->ip6_plen = 0;
+	ip6->ip6_nxt = IPPROTO_UDP;
+	ip6->ip6_hlim = sc->vxl_ttl;
+	ip6->ip6_src = *srcaddr;
+	ip6->ip6_dst = *dstaddr;
+
+	vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport);
+
+	/*
+	 * XXX BMV We need support for RFC6935 before we can send and
+	 * receive IPv6 UDP packets with a zero checksum.
+	 */
+	{
+		struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));
+		hdr->uh_sum = in6_cksum_pseudo(ip6,
+		    m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
+		m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+	}
+
+	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
+	m->m_flags &= ~(M_MCAST | M_BCAST);
+
+	error = ip6_output(m, NULL, NULL, 0, sc->vxl_im6o, NULL, NULL);
+	if (error == 0) {
+		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
+		if (mcast != 0)
+			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
+	} else
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+
+	return (error);
+#else
+	m_freem(m);
+	return (ENOTSUP);
+#endif
+}
+
+static int
+vxlan_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+	struct rm_priotracker tracker;
+	union vxlan_sockaddr vxlsa;
+	struct vxlan_softc *sc;
+	struct vxlan_ftable_entry *fe;
+	struct ifnet *mcifp;
+	struct ether_header *eh;
+	int ipv4, error;
+
+	sc = ifp->if_softc;
+	eh = mtod(m, struct ether_header *);
+	fe = NULL;
+	mcifp = NULL;
+
+	ETHER_BPF_MTAP(ifp, m);
+
+	VXLAN_RLOCK(sc, &tracker);
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+		VXLAN_RUNLOCK(sc, &tracker);
+		m_freem(m);
+		return (ENETDOWN);
+	}
+
+	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
+		fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost);
+	if (fe == NULL)
+		fe = &sc->vxl_default_fe;
+	vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa);
+
+	ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0;
+	if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
+		mcifp = vxlan_multicast_if_ref(sc, ipv4);
+
+	VXLAN_ACQUIRE(sc);
+	VXLAN_RUNLOCK(sc, &tracker);
+
+	if (ipv4 != 0)
+		error = vxlan_encap4(sc, &vxlsa, m);
+	else
+		error = vxlan_encap6(sc, &vxlsa, m);
+
+	vxlan_release(sc);
+	if (mcifp != NULL)
+		if_rele(mcifp);
+
+	return (error);
+}
+
+static void
+vxlan_qflush(struct ifnet *ifp __unused)
+{
+}
+
+static void
+vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb,
+    const struct sockaddr *srcsa, void *xvso)
+{
+	struct vxlan_socket *vso;
+	struct vxlan_header *vxh, vxlanhdr;
+	uint32_t vni;
+	int error;
+
+	M_ASSERTPKTHDR(m);
+	vso = xvso;
+	offset += sizeof(struct udphdr);
+
+	if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header))
+		goto out;
+
+	if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) {
+		m_copydata(m, offset, sizeof(struct vxlan_header),
+		    (caddr_t) &vxlanhdr);
+		vxh = &vxlanhdr;
+	} else
+		vxh = mtodo(m, offset);
+
+	/*
+	 * Drop if there is a reserved bit set in either the flags or VNI
+	 * fields of the header. This goes against the specification, but
+	 * a bit set may indicate an unsupported new feature. This matches
+	 * the behavior of the Linux implementation.
+	 */
+	if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) ||
+	    vxh->vxlh_vni & ~htonl(VXLAN_VNI_MASK))
+		goto out;
+
+	vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT;
+	/* Adjust to the start of the inner Ethernet frame. */
+	m_adj(m, offset + sizeof(struct vxlan_header));
+
+	error = vxlan_input(vso, vni, &m, srcsa);
+	MPASS(error != 0 || m == NULL);
+
+out:
+	if (m != NULL)
+		m_freem(m);
+}
+
+static int
+vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0,
+    const struct sockaddr *sa)
+{
+	struct vxlan_softc *sc;
+	struct ifnet *ifp;
+	struct mbuf *m;
+	struct ether_header *eh;
+	int error;
+
+	sc = vxlan_socket_lookup_softc(vso, vni);
+	if (sc == NULL)
+		return (ENOENT);
+
+	ifp = sc->vxl_ifp;
+	m = *m0;
+	eh = mtod(m, struct ether_header *);
+
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+		error = ENETDOWN;
+		goto out;
+	} else if (ifp == m->m_pkthdr.rcvif) {
+		/* XXX Does not catch more complex loops. */
+		error = EDEADLK;
+		goto out;
+	}
+
+	if (sc->vxl_flags & VXLAN_FLAG_LEARN)
+		vxlan_ftable_update(sc, sa, eh->ether_shost);
+
+	m_clrprotoflags(m);
+	m->m_pkthdr.rcvif = ifp;
+	M_SETFIB(m, ifp->if_fib);
+
+	error = netisr_queue_src(NETISR_ETHER, 0, m);
+	*m0 = NULL;
+
+out:
+	vxlan_release(sc);
+	return (error);
+}
+
+static void
+vxlan_set_default_config(struct vxlan_softc *sc)
+{
+
+	sc->vxl_flags |= VXLAN_FLAG_LEARN;
+
+	sc->vxl_vni = VXLAN_VNI_MAX;
+	sc->vxl_ttl = IPDEFTTL;
+
+	if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) {
+		sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT);
+		sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT);
+	} else {
+		sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
+		sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
+	}
+
+	sc->vxl_min_port = V_ipport_firstauto;
+	sc->vxl_max_port = V_ipport_lastauto;
+
+	sc->vxl_ftable_max = VXLAN_FTABLE_MAX;
+	sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT;
+}
+
+static int
+vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp)
+{
+
+#ifndef INET
+	if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 |
+	    VXLAN_PARAM_WITH_REMOTE_ADDR4))
+		return (EAFNOSUPPORT);
+#endif
+
+#ifndef INET6
+	if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 |
+	    VXLAN_PARAM_WITH_REMOTE_ADDR6))
+		return (EAFNOSUPPORT);
+#endif
+
+	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) {
+		if (vxlan_check_vni(vxlp->vxlp_vni) == 0)
+			sc->vxl_vni = vxlp->vxlp_vni;
+	}
+
+	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) {
+		sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in);
+		sc->vxl_src_addr.in4.sin_family = AF_INET;
+		sc->vxl_src_addr.in4.sin_addr = vxlp->vxlp_local_in4;
+	} else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
+		sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
+		sc->vxl_src_addr.in6.sin6_family = AF_INET6;
+		sc->vxl_src_addr.in6.sin6_addr = vxlp->vxlp_local_in6;
+	}
+
+	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) {
+		sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in);
+		sc->vxl_dst_addr.in4.sin_family = AF_INET;
+		sc->vxl_dst_addr.in4.sin_addr = vxlp->vxlp_remote_in4;
+	} else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
+		sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
+		sc->vxl_dst_addr.in6.sin6_family = AF_INET6;
+		sc->vxl_dst_addr.in6.sin6_addr = vxlp->vxlp_remote_in6;
+	}
+
+	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT)
+		sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port);
+	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT)
+		sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port);
+
+	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) {
+		if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) {
+			sc->vxl_min_port = vxlp->vxlp_min_port;
+			sc->vxl_max_port = vxlp->vxlp_max_port;
+		}
+	}
+
+	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF)
+		strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ);
+
+	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) {
+		if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0)
+			sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout;
+	}
+
+	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) {
+		if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0)
+			sc->vxl_ftable_max = vxlp->vxlp_ftable_max;
+	}
+
+	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) {
+		if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0)
+			sc->vxl_ttl = vxlp->vxlp_ttl;
+	}
+
+	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) {
+		if (vxlp->vxlp_learn == 0)
+			sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
+	}
+
+	return (0);
+}
+
+static int
+vxlan_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+	struct vxlan_softc *sc;
+	struct ifnet *ifp;
+	struct ifvxlanparam vxlp;
+	int error;
+
+	sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO);
+	sc->vxl_unit = unit;
+	vxlan_set_default_config(sc);
+
+	if (params != 0) {
+		error = copyin(params, &vxlp, sizeof(vxlp));
+		if (error)
+			goto fail;
+
+		error = vxlan_set_user_config(sc, &vxlp);
+		if (error)
+			goto fail;
+	}
+
+	ifp = if_alloc(IFT_ETHER);
+	if (ifp == NULL) {
+		error = ENOSPC;
+		goto fail;
+	}
+
+	sc->vxl_ifp = ifp;
+	rm_init(&sc->vxl_lock, "vxlanrm");
+	callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0);
+	sc->vxl_port_hash_key = arc4random();
+	vxlan_ftable_init(sc);
+
+	vxlan_sysctl_setup(sc);
+
+	ifp->if_softc = sc;
+	if_initname(ifp, vxlan_name, unit);
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_init = vxlan_init;
+	ifp->if_ioctl = vxlan_ioctl;
+	ifp->if_transmit = vxlan_transmit;
+	ifp->if_qflush = vxlan_qflush;
+
+	vxlan_fakeaddr(sc);
+	ether_ifattach(ifp, sc->vxl_hwaddr);
+
+	ifp->if_baudrate = 0;
+	ifp->if_hdrlen = 0;
+
+	return (0);
+
+fail:
+	free(sc, M_VXLAN);
+	return (error);
+}
+
+static void
+vxlan_clone_destroy(struct ifnet *ifp)
+{
+	struct vxlan_softc *sc;
+
+	sc = ifp->if_softc;
+
+	vxlan_teardown(sc);
+
+	vxlan_ftable_flush(sc, 1);
+
+	ether_ifdetach(ifp);
+	if_free(ifp);
+
+	vxlan_ftable_fini(sc);
+
+	vxlan_sysctl_destroy(sc);
+	rm_destroy(&sc->vxl_lock);
+	free(sc, M_VXLAN);
+}
+
+/* BMV: Taken from if_bridge. */
+static uint32_t
+vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr)
+{
+	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key;
+
+	b += addr[5] << 8;
+	b += addr[4];
+	a += addr[3] << 24;
+	a += addr[2] << 16;
+	a += addr[1] << 8;
+	a += addr[0];
+
+/*
+ * The following hash function is adapted from "Hash Functions" by Bob Jenkins
+ * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
+ */
+#define	mix(a, b, c)							\
+do {									\
+	a -= b; a -= c; a ^= (c >> 13);					\
+	b -= c; b -= a; b ^= (a << 8);					\
+	c -= a; c -= b; c ^= (b >> 13);					\
+	a -= b; a -= c; a ^= (c >> 12);					\
+	b -= c; b -= a; b ^= (a << 16);					\
+	c -= a; c -= b; c ^= (b >> 5);					\
+	a -= b; a -= c; a ^= (c >> 3);					\
+	b -= c; b -= a; b ^= (a << 10);					\
+	c -= a; c -= b; c ^= (b >> 15);					\
+} while (0)
+
+	mix(a, b, c);
+
+#undef mix
+
+	return (c);
+}
+
+static void
+vxlan_fakeaddr(struct vxlan_softc *sc)
+{
+
+	/*
+	 * Generate a non-multicast, locally administered address.
+	 *
+	 * BMV: Should we use the FreeBSD OUI range instead?
+	 */
+	arc4rand(sc->vxl_hwaddr, ETHER_ADDR_LEN, 1);
+	sc->vxl_hwaddr[0] &= ~1;
+	sc->vxl_hwaddr[0] |= 2;
+}
+
+static int
+vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr,
+    const struct sockaddr *sa)
+{
+
+	return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len));
+}
+
+static void
+vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr,
+    const struct sockaddr *sa)
+{
+
+	MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
+	bzero(vxladdr, sizeof(*vxladdr));
+
+	if (sa->sa_family == AF_INET) {
+		vxladdr->in4 = *satoconstsin(sa);
+		vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
+	} else if (sa->sa_family == AF_INET6) {
+		vxladdr->in6 = *satoconstsin6(sa);
+		vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
+	}
+}
+
+static int
+vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr,
+    const struct sockaddr *sa)
+{
+	int equal;
+
+	if (sa->sa_family == AF_INET) {
+		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
+		equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr;
+	} else if (sa->sa_family == AF_INET6) {
+		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
+		equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr);
+	} else
+		equal = 0;
+
+	return (equal);
+}
+
+static void
+vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr,
+    const struct sockaddr *sa)
+{
+
+	MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
+
+	if (sa->sa_family == AF_INET) {
+		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
+		vxladdr->in4.sin_family = AF_INET;
+		vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
+		vxladdr->in4.sin_addr = *in4;
+	} else if (sa->sa_family == AF_INET6) {
+		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
+		vxladdr->in6.sin6_family = AF_INET6;
+		vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
+		vxladdr->in6.sin6_addr = *in6;
+	}
+}
+
+static int
+vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec)
+{
+	const struct sockaddr *sa;
+	int supported;
+
+	sa = &vxladdr->sa;
+	supported = 0;
+
+	if (sa->sa_family == AF_UNSPEC && unspec != 0) {
+		supported = 1;
+	} else if (sa->sa_family == AF_INET) {
+#ifdef INET
+		supported = 1;
+#endif
+	} else if (sa->sa_family == AF_INET6) {
+#ifdef INET6
+		supported = 1;
+#endif
+	}
+
+	return (supported);
+}
+
+static int
+vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr)
+{
+	const struct sockaddr *sa;
+	int any;
+
+	sa = &vxladdr->sa;
+
+	if (sa->sa_family == AF_INET) {
+		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
+		any = in4->s_addr == INADDR_ANY;
+	} else if (sa->sa_family == AF_INET6) {
+		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
+		any = IN6_IS_ADDR_UNSPECIFIED(in6);
+	} else
+		any = -1;
+
+	return (any);
+}
+
+static int
+vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr)
+{
+	const struct sockaddr *sa;
+	int mc;
+
+	sa = &vxladdr->sa;
+
+	if (sa->sa_family == AF_INET) {
+		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
+		mc = IN_MULTICAST(ntohl(in4->s_addr));
+	} else if (sa->sa_family == AF_INET6) {
+		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
+		mc = IN6_IS_ADDR_MULTICAST(in6);
+	} else
+		mc = -1;
+
+	return (mc);
+}
+
+static int
+vxlan_can_change_config(struct vxlan_softc *sc)
+{
+	struct ifnet *ifp;
+
+	ifp = sc->vxl_ifp;
+	VXLAN_LOCK_ASSERT(sc);
+
+	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+		return (0);
+	if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN))
+		return (0);
+
+	return (1);
+}
+
+static int
+vxlan_check_vni(uint32_t vni)
+{
+
+	return (vni >= VXLAN_VNI_MAX);
+}
+
+static int
+vxlan_check_ttl(int ttl)
+{
+
+	return (ttl > MAXTTL);
+}
+
+static int
+vxlan_check_ftable_timeout(uint32_t timeout)
+{
+
+	return (timeout > VXLAN_FTABLE_MAX_TIMEOUT);
+}
+
+static int
+vxlan_check_ftable_max(uint32_t max)
+{
+
+	return (max > VXLAN_FTABLE_MAX);
+}
+
+static void
+vxlan_sysctl_setup(struct vxlan_softc *sc)
+{
+	struct sysctl_ctx_list *ctx;
+	struct sysctl_oid *node;
+	struct vxlan_statistics *stats;
+	char namebuf[8];
+
+	ctx = &sc->vxl_sysctl_ctx;
+	stats = &sc->vxl_stats;
+	snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit);
+
+	sysctl_ctx_init(ctx);
+	sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx,
+	    SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf,
+	    CTLFLAG_RD, NULL, "");
+
+	node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
+	    OID_AUTO, "ftable", CTLFLAG_RD, NULL, "");
+	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count",
+	    CTLFLAG_RD, &sc->vxl_ftable_cnt, 0,
+	    "Number of entries in fowarding table");
+	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max",
+	     CTLFLAG_RD, &sc->vxl_ftable_max, 0,
+	    "Maximum number of entries allowed in fowarding table");
+	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout",
+	    CTLFLAG_RD, &sc->vxl_ftable_timeout, 0,
+	    "Number of seconds between prunes of the forwarding table");
+	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump",
+	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
+	    sc, 0, vxlan_ftable_sysctl_dump, "A",
+	    "Dump the forwarding table entries");
+
+	node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
+	    OID_AUTO, "stats", CTLFLAG_RD, NULL, "");
+	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0,
+	    "Fowarding table reached maximum entries");
+	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "ftable_lock_upgrade_failed", CTLFLAG_RD,
+	    &stats->ftable_lock_upgrade_failed, 0,
+	    "Forwarding table update required lock upgrade");
+}
+
+static void
+vxlan_sysctl_destroy(struct vxlan_softc *sc)
+{
+
+	sysctl_ctx_free(&sc->vxl_sysctl_ctx);
+	sc->vxl_sysctl_node = NULL;
+}
+
+static int
+vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def)
+{
+	char path[64];
+
+	snprintf(path, sizeof(path), "net.link.vxlan.%d.%s",
+	    sc->vxl_unit, knob);
+	TUNABLE_INT_FETCH(path, &def);
+
+	return (def);
+}
+
+static void
+vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp)
+{
+	struct vxlan_softc_head list;
+	struct vxlan_socket *vso;
+	struct vxlan_softc *sc, *tsc;
+
+	LIST_INIT(&list);
+
+	if (ifp->if_flags & IFF_RENAMING)
+		return;
+	if ((ifp->if_flags & IFF_MULTICAST) == 0)
+		return;
+
+	mtx_lock(&vxlan_list_mtx);
+	LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry)
+		vxlan_socket_ifdetach(vso, ifp, &list);
+	mtx_unlock(&vxlan_list_mtx);
+
+	LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) {
+		LIST_REMOVE(sc, vxl_ifdetach_list);
+
+		VXLAN_WLOCK(sc);
+		if (sc->vxl_flags & VXLAN_FLAG_INIT)
+			vxlan_init_wait(sc);
+		vxlan_teardown_locked(sc);
+	}
+}
+
+static void
+vxlan_load(void)
+{
+
+	mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF);
+	LIST_INIT(&vxlan_socket_list);
+	vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
+	    vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY);
+	vxlan_cloner = if_clone_simple(vxlan_name, vxlan_clone_create,
+	    vxlan_clone_destroy, 0);
+}
+
+static void
+vxlan_unload(void)
+{
+
+	EVENTHANDLER_DEREGISTER(ifnet_departure_event,
+	    vxlan_ifdetach_event_tag);
+	if_clone_detach(vxlan_cloner);
+	mtx_destroy(&vxlan_list_mtx);
+	MPASS(LIST_EMPTY(&vxlan_socket_list));
+}
+
+static int
+vxlan_modevent(module_t mod, int type, void *unused)
+{
+	int error;
+
+	error = 0;
+
+	switch (type) {
+	case MOD_LOAD:
+		vxlan_load();
+		break;
+	case MOD_UNLOAD:
+		vxlan_unload();
+		break;
+	default:
+		error = ENOTSUP;
+		break;
+	}
+
+	return (error);
+}
+
+static moduledata_t vxlan_mod = {
+	"if_vxlan",
+	vxlan_modevent,
+	0
+};
+
+DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_vxlan, 1);


Property changes on: trunk/sys/net/if_vxlan.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/net/if_vxlan.h
===================================================================
--- trunk/sys/net/if_vxlan.h	                        (rev 0)
+++ trunk/sys/net/if_vxlan.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,149 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014, Bryan Venteicher <bryanv at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/net/if_vxlan.h 284365 2015-06-14 03:14:45Z bryanv $
+ */
+
+#ifndef _NET_IF_VXLAN_H_
+#define _NET_IF_VXLAN_H_
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+
+struct vxlan_header {
+	uint32_t	vxlh_flags;
+	uint32_t	vxlh_vni;
+};
+
+#define VXLAN_HDR_FLAGS_VALID_VNI	0x08000000
+#define VXLAN_HDR_VNI_SHIFT		8
+
+#define VXLAN_VNI_MAX	(1 << 24)
+#define VXLAN_VNI_MASK	(VXLAN_VNI_MAX - 1)
+
+/*
+ * The port assigned by IANA is 4789, but some early implementations
+ * (like Linux) use 8472 instead. If not specified, we default to
+ * the IANA port.
+ */
+#define VXLAN_PORT		4789
+#define VXLAN_LEGACY_PORT	8472
+
+struct ifvxlanparam {
+	uint64_t		vxlp_with;
+
+#define VXLAN_PARAM_WITH_VNI		0x0001
+#define VXLAN_PARAM_WITH_LOCAL_ADDR4	0x0002
+#define VXLAN_PARAM_WITH_LOCAL_ADDR6	0x0004
+#define VXLAN_PARAM_WITH_REMOTE_ADDR4	0x0008
+#define VXLAN_PARAM_WITH_REMOTE_ADDR6	0x0010
+#define VXLAN_PARAM_WITH_LOCAL_PORT	0x0020
+#define VXLAN_PARAM_WITH_REMOTE_PORT	0x0040
+#define VXLAN_PARAM_WITH_PORT_RANGE	0x0080
+#define VXLAN_PARAM_WITH_FTABLE_TIMEOUT	0x0100
+#define VXLAN_PARAM_WITH_FTABLE_MAX	0x0200
+#define VXLAN_PARAM_WITH_MULTICAST_IF	0x0400
+#define VXLAN_PARAM_WITH_TTL		0x0800
+#define VXLAN_PARAM_WITH_LEARN		0x1000
+
+	uint32_t		vxlp_vni;
+	struct in_addr		vxlp_local_in4;
+	struct in6_addr		vxlp_local_in6;
+	struct in_addr		vxlp_remote_in4;
+	struct in6_addr		vxlp_remote_in6;
+	uint16_t		vxlp_local_port;
+	uint16_t		vxlp_remote_port;
+	uint16_t		vxlp_min_port;
+	uint16_t		vxlp_max_port;
+	char			vxlp_mc_ifname[IFNAMSIZ];
+	uint32_t		vxlp_ftable_timeout;
+	uint32_t		vxlp_ftable_max;
+	uint8_t			vxlp_ttl;
+	uint8_t			vxlp_learn;
+};
+
+union vxlan_sockaddr {
+	struct sockaddr		sa;
+	struct sockaddr_in	in4;
+	struct sockaddr_in6	in6;
+};
+
+#define VXLAN_SOCKADDR_IS_IPV4(_vxsin)	((_vxsin)->sa.sa_family == AF_INET)
+#define VXLAN_SOCKADDR_IS_IPV6(_vxsin)	((_vxsin)->sa.sa_family == AF_INET6)
+#define VXLAN_SOCKADDR_IS_IPV46(_vxsin) \
+    (VXLAN_SOCKADDR_IS_IPV4(_vxsin) || VXLAN_SOCKADDR_IS_IPV6(_vxsin))
+
+#define VXLAN_CMD_GET_CONFIG		0
+#define VXLAN_CMD_SET_VNI		1
+#define VXLAN_CMD_SET_LOCAL_ADDR	2
+#define VXLAN_CMD_SET_REMOTE_ADDR	4
+#define VXLAN_CMD_SET_LOCAL_PORT	5
+#define VXLAN_CMD_SET_REMOTE_PORT	6
+#define VXLAN_CMD_SET_PORT_RANGE	7
+#define VXLAN_CMD_SET_FTABLE_TIMEOUT	8
+#define VXLAN_CMD_SET_FTABLE_MAX	9
+#define VXLAN_CMD_SET_MULTICAST_IF	10
+#define VXLAN_CMD_SET_TTL		11
+#define VXLAN_CMD_SET_LEARN		12
+#define VXLAN_CMD_FTABLE_ENTRY_ADD	13
+#define VXLAN_CMD_FTABLE_ENTRY_REM	14
+#define VXLAN_CMD_FLUSH			15
+
+struct ifvxlancfg {
+	uint32_t		vxlc_vni;
+	union vxlan_sockaddr	vxlc_local_sa;
+	union vxlan_sockaddr	vxlc_remote_sa;
+	uint32_t		vxlc_mc_ifindex;
+	uint32_t		vxlc_ftable_cnt;
+	uint32_t		vxlc_ftable_max;
+	uint32_t		vxlc_ftable_timeout;
+	uint16_t		vxlc_port_min;
+	uint16_t		vxlc_port_max;
+	uint8_t			vxlc_learn;
+	uint8_t			vxlc_ttl;
+};
+
+struct ifvxlancmd {
+	uint32_t		vxlcmd_flags;
+#define VXLAN_CMD_FLAG_FLUSH_ALL	0x0001
+#define VXLAN_CMD_FLAG_LEARN		0x0002
+
+	uint32_t		vxlcmd_vni;
+	uint32_t		vxlcmd_ftable_timeout;
+	uint32_t		vxlcmd_ftable_max;
+	uint16_t		vxlcmd_port;
+	uint16_t		vxlcmd_port_min;
+	uint16_t		vxlcmd_port_max;
+	uint8_t			vxlcmd_mac[ETHER_ADDR_LEN];
+	uint8_t			vxlcmd_ttl;
+	union vxlan_sockaddr	vxlcmd_sa;
+	char			vxlcmd_ifname[IFNAMSIZ];
+};
+
+#endif /* _NET_IF_VXLAN_H_ */


Property changes on: trunk/sys/net/if_vxlan.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/iso88025.h
===================================================================
--- trunk/sys/net/iso88025.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/iso88025.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -31,7 +31,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/iso88025.h 194581 2009-06-21 10:29:31Z rdivacky $
+ * $FreeBSD: stable/10/sys/net/iso88025.h 264299 2014-04-09 11:15:50Z glebius $
  *
  * Information gathered from tokenring at freebsd, /sys/net/ethernet.h and
  * the Mach token ring driver.
@@ -163,11 +163,13 @@
 #define	ISO88025_BPF_UNSUPPORTED	0
 #define	ISO88025_BPF_SUPPORTED		1
 
+#ifdef _KERNEL
 void	iso88025_ifattach	(struct ifnet *, const u_int8_t *, int);
 void	iso88025_ifdetach	(struct ifnet *, int);
 int	iso88025_ioctl		(struct ifnet *, u_long, caddr_t );
-int	iso88025_output		(struct ifnet *, struct mbuf *, struct sockaddr *,
-    				 struct route *);
+int	iso88025_output		(struct ifnet *, struct mbuf *,
+				 const struct sockaddr *, struct route *);
 void	iso88025_input		(struct ifnet *, struct mbuf *);
+#endif	/* _KERNEL */
 
-#endif
+#endif	/* !_NET_ISO88025_H_ */

Added: trunk/sys/net/mppc.h
===================================================================
--- trunk/sys/net/mppc.h	                        (rev 0)
+++ trunk/sys/net/mppc.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,63 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2007 Alexander Motin <mav at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/net/mppc.h 302061 2016-06-21 15:47:54Z pfg $
+ */
+
+/*
+ * MPPC decompression library.
+ * Version 1.0
+ *
+ * Note that Hi/Fn (later acquired by Exar Corporation) held US patents
+ * on some implementation-critical aspects of MPPC compression.
+ * These patents lapsed due to non-payment of fees in 2007 and by 2015
+ * expired altogether.
+ */
+
+#ifndef _NET_MPPC_H_
+#define	_NET_MPPC_H_
+
+#define	MPPC_MANDATORY_COMPRESS_FLAGS 0
+#define	MPPC_MANDATORY_DECOMPRESS_FLAGS 0
+
+#define	MPPC_SAVE_HISTORY 1
+
+#define	MPPC_OK 5
+#define	MPPC_EXPANDED 8
+#define	MPPC_RESTART_HISTORY 16
+#define	MPPC_DEST_EXHAUSTED 32
+
+extern size_t MPPC_SizeOfCompressionHistory(void);
+extern size_t MPPC_SizeOfDecompressionHistory(void);
+
+extern void MPPC_InitCompressionHistory(char *history);
+extern void MPPC_InitDecompressionHistory(char *history);
+
+extern int MPPC_Compress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags, int undef);
+extern int MPPC_Decompress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags);
+
+#endif


Property changes on: trunk/sys/net/mppc.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/net/mppcc.c
===================================================================
--- trunk/sys/net/mppcc.c	                        (rev 0)
+++ trunk/sys/net/mppcc.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,300 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2002-2004 Jan Dubiec <jdx at slackware.pl>
+ * Copyright (c) 2007 Alexander Motin <mav at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/net/mppcc.c 302774 2016-07-13 16:20:18Z pfg $
+ */
+
+/*
+ * MPPC decompression library.
+ * Version 1.0
+ *
+ * Note that Hi/Fn (later acquired by Exar Corporation) held US patents
+ * on some implementation-critical aspects of MPPC compression.
+ * These patents lapsed due to non-payment of fees in 2007 and by 2015
+ * expired altogether.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <net/mppc.h>
+
+#define	MPPE_HIST_LEN          8192
+
+#define	HASH(x)		(((40543*(((((x)[0]<<4)^(x)[1])<<4)^(x)[2]))>>4) & 0x1fff)
+
+struct MPPC_comp_state {
+    uint8_t	hist[2*MPPE_HIST_LEN];
+    uint16_t	histptr;
+    uint16_t	hash[MPPE_HIST_LEN];
+};
+
+/* Inserts 1 to 8 bits into the output buffer. */
+static void __inline 
+putbits8(uint8_t *buf, uint32_t val, const uint32_t n, uint32_t *i, uint32_t *l)
+{
+    buf += *i;
+    if (*l >= n) {
+	*l = (*l) - n;
+	val <<= *l;
+	*buf = *buf | (val & 0xff);
+	if (*l == 0) {
+	    *l = 8;
+	    (*i)++;
+	    *(++buf) = 0;
+	}
+    } else {
+	(*i)++;
+	*l = 8 - n + (*l);
+	val <<= *l;
+	*buf = *buf | ((val >> 8) & 0xff);
+	*(++buf) = val & 0xff;
+    }
+}
+
+/* Inserts 9 to 16 bits into the output buffer. */
+static void __inline
+putbits16(uint8_t *buf, uint32_t val, const uint32_t n, uint32_t *i, uint32_t *l)
+{
+    buf += *i;
+    if (*l >= n - 8) {
+	(*i)++;
+	*l = 8 - n + (*l);
+	val <<= *l;
+	*buf = *buf | ((val >> 8) & 0xff);
+	*(++buf) = val & 0xff;
+	if (*l == 0) {
+	    *l = 8;
+	    (*i)++;
+	    *(++buf) = 0;
+	}
+    } else {
+	(*i)++; (*i)++;
+	*l = 16 - n + (*l);
+	val <<= *l;
+	*buf = *buf | ((val >> 16) & 0xff);
+	*(++buf) = (val >> 8) & 0xff;
+	*(++buf) = val & 0xff;
+    }
+}
+
+/* Inserts 17 to 24 bits into the output buffer. */
+static void __inline
+putbits24(uint8_t *buf, uint32_t val, const uint32_t n, uint32_t *i, uint32_t *l)
+{
+    buf += *i;
+    if (*l >= n - 16) {
+	(*i)++; (*i)++;
+	*l = 16 - n + (*l);
+	val <<= *l;
+	*buf = *buf | ((val >> 16) & 0xff);
+	*(++buf) = (val >> 8) & 0xff;
+	*(++buf) = val & 0xff;
+	if (*l == 0) {
+	    *l = 8;
+	    (*i)++;
+	    *(++buf) = 0;
+	}
+    } else {
+	(*i)++; (*i)++; (*i)++;
+	*l = 24 - n + (*l);
+	val <<= *l;
+	*buf = *buf | ((val >> 24) & 0xff);
+	*(++buf) = (val >> 16) & 0xff;
+	*(++buf) = (val >> 8) & 0xff;
+	*(++buf) = val & 0xff;
+    }
+}
+
+size_t MPPC_SizeOfCompressionHistory(void)
+{
+    return (sizeof(struct MPPC_comp_state));
+}
+
+void MPPC_InitCompressionHistory(char *history)
+{
+    struct MPPC_comp_state      *state = (struct MPPC_comp_state*)history;
+
+    bzero(history, sizeof(struct MPPC_comp_state));
+    state->histptr = MPPE_HIST_LEN;
+}
+
+int MPPC_Compress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags, int undef)
+{
+    struct MPPC_comp_state	*state = (struct MPPC_comp_state*)history;
+    uint32_t olen, off, len, idx, i, l;
+    uint8_t *hist, *sbuf, *p, *q, *r, *s;    
+    int	rtn = MPPC_OK;
+
+   /*
+    * At this point, to avoid possible buffer overflow caused by packet
+    * expansion during/after compression, we should make sure we have
+    * space for the worst case.
+
+    * Maximum MPPC packet expansion is 12.5%. This is the worst case when
+    * all octets in the input buffer are >= 0x80 and we cannot find any
+    * repeated tokens.
+    */
+    if (*dstCnt < (*srcCnt * 9 / 8 + 2)) {
+	rtn &= ~MPPC_OK;
+	return (rtn);
+    }
+
+    /* We can't compress more then MPPE_HIST_LEN bytes in a call. */
+    if (*srcCnt > MPPE_HIST_LEN) {
+	rtn &= ~MPPC_OK;
+	return (rtn);
+    }
+
+    hist = state->hist + MPPE_HIST_LEN;
+    /* check if there is enough room at the end of the history */
+    if (state->histptr + *srcCnt >= 2*MPPE_HIST_LEN) {
+	rtn |= MPPC_RESTART_HISTORY;
+	state->histptr = MPPE_HIST_LEN;
+	memcpy(state->hist, hist, MPPE_HIST_LEN);
+    }
+    /* Add packet to the history. */
+    sbuf = state->hist + state->histptr;
+    memcpy(sbuf, *src, *srcCnt);
+    state->histptr += *srcCnt;
+
+    /* compress data */
+    r = sbuf + *srcCnt;
+    **dst = olen = i = 0;
+    l = 8;
+    while (i < *srcCnt - 2) {
+	s = q = sbuf + i;
+
+	/* Prognose matching position using hash function. */
+	idx = HASH(s);
+	p = hist + state->hash[idx];
+	state->hash[idx] = (uint16_t) (s - hist);
+	if (p > s)	/* It was before MPPC_RESTART_HISTORY. */
+	    p -= MPPE_HIST_LEN;	/* Try previous history buffer. */
+	off = s - p;
+
+	/* Check our prognosis. */
+	if (off > MPPE_HIST_LEN - 1 || off < 1 || *p++ != *s++ ||
+	    *p++ != *s++ || *p++ != *s++) {
+	    /* No match found; encode literal byte. */
+	    if ((*src)[i] < 0x80) {		/* literal byte < 0x80 */
+		putbits8(*dst, (uint32_t) (*src)[i], 8, &olen, &l);
+	    } else {				/* literal byte >= 0x80 */
+		putbits16(*dst, (uint32_t) (0x100|((*src)[i]&0x7f)), 9,
+		    &olen, &l);
+	    }
+	    ++i;
+	    continue;
+	}
+
+	/* Find length of the matching fragment */
+#if defined(__amd64__) || defined(__i386__)
+	/* Optimization for CPUs without strict data aligning requirements */
+	while ((*((uint32_t*)p) == *((uint32_t*)s)) && (s < (r - 3))) {
+	    p+=4;
+	    s+=4;
+	}
+#endif
+	while((*p++ == *s++) && (s <= r));
+	len = s - q - 1;
+	i += len;
+
+	/* At least 3 character match found; code data. */
+	/* Encode offset. */
+	if (off < 64) {			/* 10-bit offset; 0 <= offset < 64 */
+	    putbits16(*dst, 0x3c0|off, 10, &olen, &l);
+	} else if (off < 320) {		/* 12-bit offset; 64 <= offset < 320 */
+	    putbits16(*dst, 0xe00|(off-64), 12, &olen, &l);
+	} else if (off < 8192) {	/* 16-bit offset; 320 <= offset < 8192 */
+	    putbits16(*dst, 0xc000|(off-320), 16, &olen, &l);
+	} else {		/* NOTREACHED */
+	    rtn &= ~MPPC_OK;
+	    return (rtn);
+	}
+
+	/* Encode length of match. */
+	if (len < 4) {			/* length = 3 */
+	    putbits8(*dst, 0, 1, &olen, &l);
+	} else if (len < 8) {		/* 4 <= length < 8 */
+	    putbits8(*dst, 0x08|(len&0x03), 4, &olen, &l);
+	} else if (len < 16) {		/* 8 <= length < 16 */
+	    putbits8(*dst, 0x30|(len&0x07), 6, &olen, &l);
+	} else if (len < 32) {		/* 16 <= length < 32 */
+	    putbits8(*dst, 0xe0|(len&0x0f), 8, &olen, &l);
+	} else if (len < 64) {		/* 32 <= length < 64 */
+	    putbits16(*dst, 0x3c0|(len&0x1f), 10, &olen, &l);
+	} else if (len < 128) {		/* 64 <= length < 128 */
+	    putbits16(*dst, 0xf80|(len&0x3f), 12, &olen, &l);
+	} else if (len < 256) {		/* 128 <= length < 256 */
+	    putbits16(*dst, 0x3f00|(len&0x7f), 14, &olen, &l);
+	} else if (len < 512) {		/* 256 <= length < 512 */
+	    putbits16(*dst, 0xfe00|(len&0xff), 16, &olen, &l);
+	} else if (len < 1024) {	/* 512 <= length < 1024 */
+	    putbits24(*dst, 0x3fc00|(len&0x1ff), 18, &olen, &l);
+	} else if (len < 2048) {	/* 1024 <= length < 2048 */
+	    putbits24(*dst, 0xff800|(len&0x3ff), 20, &olen, &l);
+	} else if (len < 4096) {	/* 2048 <= length < 4096 */
+	    putbits24(*dst, 0x3ff000|(len&0x7ff), 22, &olen, &l);
+	} else if (len < 8192) {	/* 4096 <= length < 8192 */
+	    putbits24(*dst, 0xffe000|(len&0xfff), 24, &olen, &l);
+	} else {	/* NOTREACHED */
+	    rtn &= ~MPPC_OK;
+	    return (rtn);
+	}
+    }
+
+    /* Add remaining octets to the output. */
+    while(*srcCnt - i > 0) {
+	if ((*src)[i] < 0x80) {	/* literal byte < 0x80 */
+	    putbits8(*dst, (uint32_t) (*src)[i++], 8, &olen, &l);
+	} else {		/* literal byte >= 0x80 */
+	    putbits16(*dst, (uint32_t) (0x100|((*src)[i++]&0x7f)), 9, &olen,
+	        &l);
+	}
+    }
+
+    /* Reset unused bits of the last output octet. */
+    if ((l != 0) && (l != 8)) {
+	putbits8(*dst, 0, l, &olen, &l);
+    }
+
+    /* If result is bigger then original, set flag and flush history. */
+    if ((*srcCnt < olen) || ((flags & MPPC_SAVE_HISTORY) == 0)) {
+	if (*srcCnt < olen)
+	    rtn |= MPPC_EXPANDED;
+	bzero(history, sizeof(struct MPPC_comp_state));
+	state->histptr = MPPE_HIST_LEN;
+    }
+
+    *src += *srcCnt;
+    *srcCnt = 0;
+    *dst += olen;
+    *dstCnt -= olen;
+
+    return (rtn);
+}


Property changes on: trunk/sys/net/mppcc.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/net/mppcd.c
===================================================================
--- trunk/sys/net/mppcd.c	                        (rev 0)
+++ trunk/sys/net/mppcd.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,285 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2002-2004 Jan Dubiec <jdx at slackware.pl>
+ * Copyright (c) 2007 Alexander Motin <mav at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/net/mppcd.c 302774 2016-07-13 16:20:18Z pfg $
+ */
+
+/*
+ * MPPC decompression library.
+ * Version 1.0
+ *
+ * Note that Hi/Fn (later acquired by Exar Corporation) held US patents
+ * on some implementation-critical aspects of MPPC compression.
+ * These patents lapsed due to non-payment of fees in 2007 and by 2015
+ * expired altogether.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <net/mppc.h>
+
+#define	MPPE_HIST_LEN          8192
+
+struct MPPC_decomp_state {
+    uint8_t	hist[2*MPPE_HIST_LEN];
+    uint16_t	histptr;
+};
+
+static uint32_t __inline
+getbits(const uint8_t *buf, const uint32_t n, uint32_t *i, uint32_t *l)
+{
+    static const uint32_t m[] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
+    uint32_t res, ol;
+
+    ol = *l;
+    if (*l >= n) {
+	*l = (*l) - n;
+	res = (buf[*i] & m[ol]) >> (*l);
+	if (*l == 0) {
+	    *l = 8;
+	    (*i)++;
+	}
+    } else {
+	*l = 8 - n + (*l);
+	res = (buf[(*i)++] & m[ol]) << 8;
+	res = (res | buf[*i]) >> (*l);
+    }
+
+    return (res);
+}
+
+static uint32_t __inline
+getbyte(const uint8_t *buf, const uint32_t i, const uint32_t l)
+{
+    if (l == 8) {
+	return (buf[i]);
+    } else {
+	return ((((buf[i] << 8) | buf[i+1]) >> l) & 0xff);
+    }
+}
+
+static void __inline
+lamecopy(uint8_t *dst, uint8_t *src, uint32_t len)
+{
+    while (len--)
+	*dst++ = *src++;
+}
+
+size_t MPPC_SizeOfDecompressionHistory(void)
+{
+    return (sizeof(struct MPPC_decomp_state));
+}
+
+void MPPC_InitDecompressionHistory(char *history)
+{
+    struct MPPC_decomp_state      *state = (struct MPPC_decomp_state*)history;
+
+    bzero(history, sizeof(struct MPPC_decomp_state));
+    state->histptr = MPPE_HIST_LEN;
+}
+
+int MPPC_Decompress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags)
+{
+    struct MPPC_decomp_state      *state = (struct MPPC_decomp_state*)history;
+    uint32_t olen, off, len, bits, val, sig, i, l;
+    uint8_t *hist, *s;
+    u_char *isrc = *src;
+    int	rtn = MPPC_OK;
+
+    if ((flags & MPPC_RESTART_HISTORY) != 0) {
+	memcpy(state->hist, state->hist + MPPE_HIST_LEN, MPPE_HIST_LEN);
+	state->histptr = MPPE_HIST_LEN;
+    }
+
+    hist = state->hist + state->histptr;
+    olen = len = i = 0;
+    l = 8;
+    bits = *srcCnt * 8;
+    while (bits >= 8) {
+	val = getbyte(isrc, i++, l);
+	if (val < 0x80) {		/* literal byte < 0x80 */
+	    if (state->histptr < 2*MPPE_HIST_LEN) {
+		/* Copy uncompressed byte to the history. */
+		(state->hist)[(state->histptr)++] = (uint8_t) val;
+	    } else {
+		/* Buffer overflow; drop packet. */
+		rtn &= ~MPPC_OK;
+		return rtn;
+	    }
+	    olen++;
+	    bits -= 8;
+	    continue;
+	}
+
+	sig = val & 0xc0;
+	if (sig == 0x80) {		/* literal byte >= 0x80 */
+	    if (state->histptr < 2*MPPE_HIST_LEN) {
+		/* Copy uncompressed byte to the history. */
+		(state->hist)[(state->histptr)++] = 
+		    (uint8_t) (0x80|((val&0x3f)<<1)|getbits(isrc, 1 , &i ,&l));
+	    } else {
+		/* buffer overflow; drop packet */
+		rtn &= ~MPPC_OK;
+		return (rtn);
+	    }
+	    olen++;
+	    bits -= 9;
+	    continue;
+	}
+
+	/* Not a literal byte so it must be an (offset,length) pair */
+	/* decode offset */
+	sig = val & 0xf0;
+	if (sig == 0xf0) {		/* 10-bit offset; 0 <= offset < 64 */
+	    off = (((val&0x0f)<<2)|getbits(isrc, 2 , &i ,&l));
+	    bits -= 10;
+	} else {
+	    if (sig == 0xe0) {		/* 12-bit offset; 64 <= offset < 320 */
+		off = ((((val&0x0f)<<4)|getbits(isrc, 4 , &i ,&l))+64);
+		bits -= 12;
+	    } else {
+		if ((sig&0xe0) == 0xc0) {/* 16-bit offset; 320 <= offset < 8192 */
+		    off = ((((val&0x1f)<<8)|getbyte(isrc, i++, l))+320);
+		    bits -= 16;
+		    if (off > MPPE_HIST_LEN - 1) {
+			rtn &= ~MPPC_OK;
+			return (rtn);
+		    }
+		} else {		/* This shouldn't happen. */
+		    rtn &= ~MPPC_OK;
+		    return (rtn);
+		}
+	    }
+	}
+	/* Decode length of match. */
+	val = getbyte(isrc, i, l);
+	if ((val & 0x80) == 0x00) {			/* len = 3 */
+	    len = 3;
+	    bits--;
+	    getbits(isrc, 1 , &i ,&l);
+	} else if ((val & 0xc0) == 0x80) {		/* 4 <= len < 8 */
+	    len = 0x04 | ((val>>4) & 0x03);
+	    bits -= 4;
+	    getbits(isrc, 4 , &i ,&l);
+	} else if ((val & 0xe0) == 0xc0) {		/* 8 <= len < 16 */
+	    len = 0x08 | ((val>>2) & 0x07);
+	    bits -= 6;
+	    getbits(isrc, 6 , &i ,&l);
+	} else if ((val & 0xf0) == 0xe0) {		/* 16 <= len < 32 */
+	    len = 0x10 | (val & 0x0f);
+	    bits -= 8;
+	    i++;
+	} else {
+	    bits -= 8;
+	    val = (val << 8) | getbyte(isrc, ++i, l);
+	    if ((val & 0xf800) == 0xf000) {		/* 32 <= len < 64 */
+		len = 0x0020 | ((val >> 6) & 0x001f);
+		bits -= 2;
+		getbits(isrc, 2 , &i ,&l);
+	    } else if ((val & 0xfc00) == 0xf800) {	/* 64 <= len < 128 */
+		len = 0x0040 | ((val >> 4) & 0x003f);
+		bits -= 4;
+		getbits(isrc, 4 , &i ,&l);
+	    } else if ((val & 0xfe00) == 0xfc00) {	/* 128 <= len < 256 */
+		len = 0x0080 | ((val >> 2) & 0x007f);
+		bits -= 6;
+		getbits(isrc, 6 , &i ,&l);
+	    } else if ((val & 0xff00) == 0xfe00) {	/* 256 <= len < 512 */
+		len = 0x0100 | (val & 0x00ff);
+		bits -= 8;
+		i++;
+	    } else {
+		bits -= 8;
+		val = (val << 8) | getbyte(isrc, ++i, l);
+		if ((val & 0xff8000) == 0xff0000) {	/* 512 <= len < 1024 */
+		    len = 0x000200 | ((val >> 6) & 0x0001ff);
+		    bits -= 2;
+		    getbits(isrc, 2 , &i ,&l);
+		} else if ((val & 0xffc000) == 0xff8000) {/* 1024 <= len < 2048 */
+		    len = 0x000400 | ((val >> 4) & 0x0003ff);
+		    bits -= 4;
+		    getbits(isrc, 4 , &i ,&l);
+		} else if ((val & 0xffe000) == 0xffc000) {/* 2048 <= len < 4096 */
+		    len = 0x000800 | ((val >> 2) & 0x0007ff);
+		    bits -= 6;
+		    getbits(isrc, 6 , &i ,&l);
+		} else if ((val & 0xfff000) == 0xffe000) {/* 4096 <= len < 8192 */
+		    len = 0x001000 | (val & 0x000fff);
+		    bits -= 8;
+		    i++;
+		} else {				/* NOTREACHED */
+		    rtn &= ~MPPC_OK;
+		    return (rtn);
+		}
+	    }
+	}
+
+	s = state->hist + state->histptr;
+	state->histptr += len;
+	olen += len;
+	if (state->histptr < 2*MPPE_HIST_LEN) {
+	    /* Copy uncompressed bytes to the history. */
+
+	    /*
+	     * In some cases len may be greater than off. It means that memory
+	     * areas pointed by s and s-off overlap. To decode that strange case
+	     * data should be copied exactly by address increasing to make
+	     * some data repeated.
+	     */
+	    lamecopy(s, s - off, len);
+	} else {
+	    /* Buffer overflow; drop packet. */
+	    rtn &= ~MPPC_OK;
+	    return (rtn);
+	}
+    }
+
+    /* Do PFC decompression. */
+    len = olen;
+    if ((hist[0] & 0x01) != 0) {
+	(*dst)[0] = 0;
+	(*dst)++;
+	len++;
+    }
+
+    if (len <= *dstCnt) {
+	/* Copy uncompressed packet to the output buffer. */
+	memcpy(*dst, hist, olen);
+    } else {
+	/* Buffer overflow; drop packet. */
+	rtn |= MPPC_DEST_EXHAUSTED;
+    }
+
+    *src += *srcCnt;
+    *srcCnt = 0;
+    *dst += len;
+    *dstCnt -= len;
+
+    return (rtn);
+}


Property changes on: trunk/sys/net/mppcd.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/netisr.c
===================================================================
--- trunk/sys/net/netisr.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/netisr.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/netisr.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/netisr.c 282832 2015-05-13 08:04:50Z hiren $");
 
 /*
  * netisr is a packet dispatch service, allowing synchronous (directly
@@ -155,25 +155,15 @@
     "netisr dispatch policy");
 
 /*
- * These sysctls were used in previous versions to control and export
- * dispatch policy state.  Now, we provide read-only export via them so that
- * older netstat binaries work.  At some point they can be garbage collected.
- */
-static int	netisr_direct_force;
-SYSCTL_INT(_net_isr, OID_AUTO, direct_force, CTLFLAG_RD,
-    &netisr_direct_force, 0, "compat: force direct dispatch");
-
-static int	netisr_direct;
-SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RD, &netisr_direct, 0,
-    "compat: enable direct dispatch");
-
-/*
  * Allow the administrator to limit the number of threads (CPUs) to use for
  * netisr.  We don't check netisr_maxthreads before creating the thread for
- * CPU 0, so in practice we ignore values <= 1.  This must be set at boot.
- * We will create at most one thread per CPU.
+ * CPU 0. This must be set at boot. We will create at most one thread per CPU.
+ * By default we initialize this to 1 which would assign just 1 cpu (cpu0) and
+ * therefore only 1 workstream. If set to -1, netisr would use all cpus
+ * (mp_ncpus) and therefore would have those many workstreams. One workstream
+ * per thread (CPU).
  */
-static int	netisr_maxthreads = -1;		/* Max number of threads. */
+static int	netisr_maxthreads = 1;		/* Max number of threads. */
 TUNABLE_INT("net.isr.maxthreads", &netisr_maxthreads);
 SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN,
     &netisr_maxthreads, 0,
@@ -339,32 +329,6 @@
 	return (EINVAL);
 }
 
-static void
-netisr_dispatch_policy_compat(void)
-{
-
-	switch (netisr_dispatch_policy) {
-	case NETISR_DISPATCH_DEFERRED:
-		netisr_direct_force = 0;
-		netisr_direct = 0;
-		break;
-
-	case NETISR_DISPATCH_HYBRID:
-		netisr_direct_force = 0;
-		netisr_direct = 1;
-		break;
-
-	case NETISR_DISPATCH_DIRECT:
-		netisr_direct_force = 1;
-		netisr_direct = 1;
-		break;
-
-	default:
-		panic("%s: unknown policy %u", __func__,
-		    netisr_dispatch_policy);
-	}
-}
-
 static int
 sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
 {
@@ -380,10 +344,8 @@
 		    &dispatch_policy);
 		if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
 			error = EINVAL;
-		if (error == 0) {
+		if (error == 0)
 			netisr_dispatch_policy = dispatch_policy;
-			netisr_dispatch_policy_compat();
-		}
 	}
 	return (error);
 }
@@ -728,12 +690,13 @@
 	}
 
 	if (policy == NETISR_POLICY_FLOW) {
-		if (!(m->m_flags & M_FLOWID) && npp->np_m2flow != NULL) {
+		if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE &&
+		    npp->np_m2flow != NULL) {
 			m = npp->np_m2flow(m, source);
 			if (m == NULL)
 				return (NULL);
 		}
-		if (m->m_flags & M_FLOWID) {
+		if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
 			*cpuidp =
 			    netisr_default_flow2cpu(m->m_pkthdr.flowid);
 			return (m);
@@ -1169,8 +1132,10 @@
 	KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__));
 
 	NETISR_LOCK_INIT();
-	if (netisr_maxthreads < 1)
-		netisr_maxthreads = 1;
+	if (netisr_maxthreads == 0 || netisr_maxthreads < -1 )
+		netisr_maxthreads = 1;		/* default behavior */
+	else if (netisr_maxthreads == -1)
+		netisr_maxthreads = mp_ncpus;	/* use max cpus */
 	if (netisr_maxthreads > mp_ncpus) {
 		printf("netisr_init: forcing maxthreads from %d to %d\n",
 		    netisr_maxthreads, mp_ncpus);
@@ -1200,10 +1165,9 @@
 		    &dispatch_policy);
 		if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
 			error = EINVAL;
-		if (error == 0) {
+		if (error == 0)
 			netisr_dispatch_policy = dispatch_policy;
-			netisr_dispatch_policy_compat();
-		} else
+		else
 			printf(
 			    "%s: invalid dispatch policy %s, using default\n",
 			    __func__, tmp);

Modified: trunk/sys/net/netisr.h
===================================================================
--- trunk/sys/net/netisr.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/netisr.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/netisr.h 222249 2011-05-24 12:34:19Z rwatson $
+ * $FreeBSD: stable/10/sys/net/netisr.h 222249 2011-05-24 12:34:19Z rwatson $
  */
 
 #ifndef _NET_NETISR_H_

Modified: trunk/sys/net/netisr_internal.h
===================================================================
--- trunk/sys/net/netisr_internal.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/netisr_internal.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/netisr_internal.h 222249 2011-05-24 12:34:19Z rwatson $
+ * $FreeBSD: stable/10/sys/net/netisr_internal.h 222249 2011-05-24 12:34:19Z rwatson $
  */
 
 #ifndef _NET_NETISR_INTERNAL_H_

Modified: trunk/sys/net/netmap.h
===================================================================
--- trunk/sys/net/netmap.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/netmap.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,244 +1,280 @@
 /* $MidnightBSD$ */
 /*
- * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
- * 
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
  * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
  *   1. Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
- * 
  *   2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
- *      documentation and/or other materials provided with the
- *      distribution.
- * 
- *   3. Neither the name of the authors nor the names of their contributors
- *      may be used to endorse or promote products derived from this
- *      software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  */
 
 /*
- * $FreeBSD: stable/9/sys/net/netmap.h 247230 2013-02-24 18:26:17Z luigi $
- * $Id: netmap.h 11997 2013-01-17 21:59:12Z luigi $
+ * $FreeBSD: stable/10/sys/net/netmap.h 270292 2014-08-21 19:42:03Z np $
  *
  * Definitions of constants and the structures used by the netmap
  * framework, for the part visible to both kernel and userspace.
  * Detailed info on netmap is available with "man netmap" or at
- * 
+ *
  *	http://info.iet.unipi.it/~luigi/netmap/
+ *
+ * This API is also used to communicate with the VALE software switch
  */
 
 #ifndef _NET_NETMAP_H_
 #define _NET_NETMAP_H_
 
+#define	NETMAP_API	11		/* current API version */
+
+#define	NETMAP_MIN_API	11		/* min and max versions accepted */
+#define	NETMAP_MAX_API	15
 /*
+ * Some fields should be cache-aligned to reduce contention.
+ * The alignment is architecture and OS dependent, but rather than
+ * digging into OS headers to find the exact value we use an estimate
+ * that should cover most architectures.
+ */
+#define NM_CACHE_ALIGN	128
+
+/*
  * --- Netmap data structures ---
  *
- * The data structures used by netmap are shown below. Those in
- * capital letters are in an mmapp()ed area shared with userspace,
- * while others are private to the kernel.
- * Shared structures do not contain pointers but only memory
- * offsets, so that addressing is portable between kernel and userspace.
+ * The userspace data structures used by netmap are shown below.
+ * They are allocated by the kernel and mmap()ed by userspace threads.
+ * Pointers are implemented as memory offsets or indexes,
+ * so that they can be easily dereferenced in kernel and userspace.
 
+   KERNEL (opaque, obviously)
 
- softc
-+----------------+
-| standard fields|
-| if_pspare[0] ----------+
-+----------------+       |
-                         |
-+----------------+<------+
-|(netmap_adapter)|
-|                |                             netmap_kring
-| tx_rings *--------------------------------->+---------------+
-|                |       netmap_kring         | ring    *---------.
-| rx_rings *--------->+---------------+       | nr_hwcur      |   |
-+----------------+    | ring    *--------.    | nr_hwavail    |   V
-                      | nr_hwcur      |  |    | selinfo       |   |
-                      | nr_hwavail    |  |    +---------------+   .
-                      | selinfo       |  |    |     ...       |   .
-                      +---------------+  |    |(ntx+1 entries)|
-                      |    ....       |  |    |               |
-                      |(nrx+1 entries)|  |    +---------------+
-                      |               |  |
-   KERNEL             +---------------+  |
-                                         |
   ====================================================================
                                          |
-   USERSPACE                             |      NETMAP_RING
-                                         +---->+-------------+
-                                             / | cur         |
-   NETMAP_IF  (nifp, one per file desc.)    /  | avail       |
-    +---------------+                      /   | buf_ofs     |
-    | ni_tx_rings   |                     /    +=============+
-    | ni_rx_rings   |                    /     | buf_idx     | slot[0]
-    |               |                   /      | len, flags  |
-    |               |                  /       +-------------+
-    +===============+                 /        | buf_idx     | slot[1]
-    | txring_ofs[0] | (rel.to nifp)--'         | len, flags  |
-    | txring_ofs[1] |                          +-------------+
-  (num_rings+1 entries)                     (nr_num_slots entries)
-    | txring_ofs[n] |                          | buf_idx     | slot[n-1]
-    +---------------+                          | len, flags  |
-    | rxring_ofs[0] |                          +-------------+
+   USERSPACE                             |      struct netmap_ring
+                                         +---->+---------------+
+                                             / | head,cur,tail |
+   struct netmap_if (nifp, 1 per fd)        /  | buf_ofs       |
+    +---------------+                      /   | other fields  |
+    | ni_tx_rings   |                     /    +===============+
+    | ni_rx_rings   |                    /     | buf_idx, len  | slot[0]
+    |               |                   /      | flags, ptr    |
+    |               |                  /       +---------------+
+    +===============+                 /        | buf_idx, len  | slot[1]
+    | txring_ofs[0] | (rel.to nifp)--'         | flags, ptr    |
+    | txring_ofs[1] |                          +---------------+
+     (tx+1 entries)                           (num_slots entries)
+    | txring_ofs[t] |                          | buf_idx, len  | slot[n-1]
+    +---------------+                          | flags, ptr    |
+    | rxring_ofs[0] |                          +---------------+
     | rxring_ofs[1] |
-  (num_rings+1 entries)
-    | txring_ofs[n] |
+     (rx+1 entries)
+    | rxring_ofs[r] |
     +---------------+
 
- * The private descriptor ('softc' or 'adapter') of each interface
- * is extended with a "struct netmap_adapter" containing netmap-related
- * info (see description in dev/netmap/netmap_kernel.h.
- * Among other things, tx_rings and rx_rings point to the arrays of
- * "struct netmap_kring" which in turn reache the various
- * "struct netmap_ring", shared with userspace.
-
- * The NETMAP_RING is the userspace-visible replica of the NIC ring.
- * Each slot has the index of a buffer, its length and some flags.
+ * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to
+ * a file descriptor, the mmap()ed region contains a (logically readonly)
+ * struct netmap_if pointing to struct netmap_ring's.
+ *
+ * There is one netmap_ring per physical NIC ring, plus one tx/rx ring
+ * pair attached to the host stack (this pair is unused for non-NIC ports).
+ *
+ * All physical/host stack ports share the same memory region,
+ * so that zero-copy can be implemented between them.
+ * VALE switch ports instead have separate memory regions.
+ *
+ * The netmap_ring is the userspace-visible replica of the NIC ring.
+ * Each slot has the index of a buffer (MTU-sized and residing in the
+ * mmapped region), its length and some flags. An extra 64-bit pointer
+ * is provided for user-supplied buffers in the tx path.
+ *
  * In user space, the buffer address is computed as
- *	(char *)ring + buf_ofs + index*NETMAP_BUF_SIZE
- * In the kernel, buffers do not necessarily need to be contiguous,
- * and the virtual and physical addresses are derived through
- * a lookup table.
+ *	(char *)ring + buf_ofs + index * NETMAP_BUF_SIZE
  *
- * struct netmap_slot:
+ * Added in NETMAP_API 11:
  *
- * buf_idx	is the index of the buffer associated to the slot.
- * len		is the length of the payload
- * NS_BUF_CHANGED	must be set whenever userspace wants
- *		to change buf_idx (it might be necessary to
- *		reprogram the NIC slot)
- * NS_REPORT	must be set if we want the NIC to generate an interrupt
- *		when this slot is used. Leaving it to 0 improves
- *		performance.
- * NS_FORWARD	if set on a receive ring, and the device is in
- *		transparent mode, buffers released with the flag set
- *		will be forwarded to the 'other' side (host stack
- *		or NIC, respectively) on the next select() or ioctl()
- * NS_NO_LEARN	on a VALE switch, do not 'learn' the source port for
- *		this packet.
- * NS_PORT_MASK	the high 8 bits of the flag, if not zero, indicate the
- *		destination port for the VALE switch, overriding
- *		the lookup table.
+ * + NIOCREGIF can request the allocation of extra spare buffers from
+ *   the same memory pool. The desired number of buffers must be in
+ *   nr_arg3. The ioctl may return fewer buffers, depending on memory
+ *   availability. nr_arg3 will return the actual value, and, once
+ *   mapped, nifp->ni_bufs_head will be the index of the first buffer.
+ *
+ *   The buffers are linked to each other using the first uint32_t
+ *   as the index. On close, ni_bufs_head must point to the list of
+ *   buffers to be released.
+ *
+ * + NIOCREGIF can request space for extra rings (and buffers)
+ *   allocated in the same memory space. The number of extra rings
+ *   is in nr_arg1, and is advisory. This is a no-op on NICs where
+ *   the size of the memory space is fixed.
+ *
+ * + NIOCREGIF can attach to PIPE rings sharing the same memory
+ *   space with a parent device. The ifname indicates the parent device,
+ *   which must already exist. Flags in nr_flags indicate if we want to
+ *   bind the master or slave side, the index (from nr_ringid)
+ *   is just a cookie and does not need to be sequential.
+ *
+ * + NIOCREGIF can also attach to 'monitor' rings that replicate
+ *   the content of specific rings, also from the same memory space.
+ *
+ *   Extra flags in nr_flags support the above functions.
+ *   Application libraries may use the following naming scheme:
+ *	netmap:foo			all NIC ring pairs
+ *	netmap:foo^			only host ring pair
+ *	netmap:foo+			all NIC ring + host ring pairs
+ *	netmap:foo-k			the k-th NIC ring pair
+ *	netmap:foo{k			PIPE ring pair k, master side
+ *	netmap:foo}k			PIPE ring pair k, slave side
  */
 
+/*
+ * struct netmap_slot is a buffer descriptor
+ */
 struct netmap_slot {
-	uint32_t buf_idx; /* buffer index */
-	uint16_t len;	/* packet length, to be copied to/from the hw ring */
-	uint16_t flags;	/* buf changed, etc. */
-#define	NS_BUF_CHANGED	0x0001	/* must resync the map, buffer changed */
-#define	NS_REPORT	0x0002	/* ask the hardware to report results
-				 * e.g. by generating an interrupt
-				 */
-#define	NS_FORWARD	0x0004	/* pass packet to the other endpoint
-				 * (host stack or device)
-				 */
-#define	NS_NO_LEARN	0x0008
+	uint32_t buf_idx;	/* buffer index */
+	uint16_t len;		/* length for this slot */
+	uint16_t flags;		/* buf changed, etc. */
+	uint64_t ptr;		/* pointer for indirect buffers */
+};
+
+/*
+ * The following flags control how the slot is used
+ */
+
+#define	NS_BUF_CHANGED	0x0001	/* buf_idx changed */
+	/*
+	 * must be set whenever buf_idx is changed (as it might be
+	 * necessary to recompute the physical address and mapping)
+	 */
+
+#define	NS_REPORT	0x0002	/* ask the hardware to report results */
+	/*
+	 * Request notification when slot is used by the hardware.
+	 * Normally transmit completions are handled lazily and
+	 * may be unreported. This flag lets us know when a slot
+	 * has been sent (e.g. to terminate the sender).
+	 */
+
+#define	NS_FORWARD	0x0004	/* pass packet 'forward' */
+	/*
+	 * (Only for physical ports, rx rings with NR_FORWARD set).
+	 * Slot released to the kernel (i.e. before ring->head) with
+	 * this flag set are passed to the peer ring (host/NIC),
+	 * thus restoring the host-NIC connection for these slots.
+	 * This supports efficient traffic monitoring or firewalling.
+	 */
+
+#define	NS_NO_LEARN	0x0008	/* disable bridge learning */
+ 	/*
+	 * On a VALE switch, do not 'learn' the source port for
+ 	 * this buffer.
+	 */
+
+#define	NS_INDIRECT	0x0010	/* userspace buffer */
+ 	/*
+	 * (VALE tx rings only) data is in a userspace buffer,
+	 * whose address is in the 'ptr' field in the slot.
+	 */
+
+#define	NS_MOREFRAG	0x0020	/* packet has more fragments */
+ 	/*
+	 * (VALE ports only)
+	 * Set on all but the last slot of a multi-segment packet.
+	 * The 'len' field refers to the individual fragment.
+	 */
+
 #define	NS_PORT_SHIFT	8
 #define	NS_PORT_MASK	(0xff << NS_PORT_SHIFT)
-};
+	/*
+ 	 * The high 8 bits of the flag, if not zero, indicate the
+	 * destination port for the VALE switch, overriding
+ 	 * the lookup table.
+ 	 */
 
+#define	NS_RFRAGS(_slot)	( ((_slot)->flags >> 8) & 0xff)
+	/*
+	 * (VALE rx rings only) the high 8 bits
+	 *  are the number of fragments.
+	 */
+
+
 /*
+ * struct netmap_ring
+ *
  * Netmap representation of a TX or RX ring (also known as "queue").
  * This is a queue implemented as a fixed-size circular array.
- * At the software level, two fields are important: avail and cur.
+ * At the software level the important fields are: head, cur, tail.
  *
  * In TX rings:
- *	avail	indicates the number of slots available for transmission.
- *		It is updated by the kernel after every netmap system call.
- *		It MUST BE decremented by the application when it appends a
- *		packet.
- *	cur	indicates the slot to use for the next packet
- *		to send (i.e. the "tail" of the queue).
- *		It MUST BE incremented by the application before
- *		netmap system calls to reflect the number of newly
- *		sent packets.
- *		It is checked by the kernel on netmap system calls
- *		(normally unmodified by the kernel unless invalid).
  *
- *   The kernel side of netmap uses two additional fields in its own
- *   private ring structure, netmap_kring:
- *	nr_hwcur is a copy of nr_cur on an NIOCTXSYNC.
- *	nr_hwavail is the number of slots known as available by the
- *		hardware. It is updated on an INTR (inc by the
- *		number of packets sent) and on a NIOCTXSYNC
- *		(decrease by nr_cur - nr_hwcur)
- *		A special case, nr_hwavail is -1 if the transmit
- *		side is idle (no pending transmits).
+ *	head	first slot available for transmission.
+ *	cur	wakeup point. select() and poll() will unblock
+ *		when 'tail' moves past 'cur'
+ *	tail	(readonly) first slot reserved to the kernel
  *
+ *	[head .. tail-1] can be used for new packets to send;
+ *	'head' and 'cur' must be incremented as slots are filled
+ *	    with new packets to be sent;
+ *	'cur' can be moved further ahead if we need more space
+ *	for new transmissions. XXX todo (2014-03-12)
+ *
  * In RX rings:
- *	avail	is the number of packets available (possibly 0).
- *		It MUST BE decremented by the application when it consumes
- *		a packet, and it is updated to nr_hwavail on a NIOCRXSYNC
- *	cur	indicates the first slot that contains a packet not
- *		processed yet (the "head" of the queue).
- *		It MUST BE incremented by the software when it consumes
- *		a packet.
- *	reserved	indicates the number of buffers before 'cur'
- *		that the application has still in use. Normally 0,
- *		it MUST BE incremented by the application when it
- *		does not return the buffer immediately, and decremented
- *		when the buffer is finally freed.
  *
- *   The kernel side of netmap uses two additional fields in the kring:
- *	nr_hwcur is a copy of nr_cur on an NIOCRXSYNC
- *	nr_hwavail is the number of packets available. It is updated
- *		on INTR (inc by the number of new packets arrived)
- *		and on NIOCRXSYNC (decreased by nr_cur - nr_hwcur).
+ *	head	first valid received packet
+ *	cur	wakeup point. select() and poll() will unblock
+ *		when 'tail' moves past 'cur'
+ *	tail	(readonly) first slot reserved to the kernel
  *
+ *	[head .. tail-1] contain received packets;
+ *	'head' and 'cur' must be incremented as slots are consumed
+ *		and can be returned to the kernel;
+ *	'cur' can be moved further ahead if we want to wait for
+ *		new packets without returning the previous ones.
+ *
  * DATA OWNERSHIP/LOCKING:
- *	The netmap_ring is owned by the user program and it is only
- *	accessed or modified in the upper half of the kernel during
- *	a system call.
+ *	The netmap_ring, and all slots and buffers in the range
+ *	[head .. tail-1] are owned by the user program;
+ *	the kernel only accesses them during a netmap system call
+ *	and in the user thread context.
  *
- *	The netmap_kring is only modified by the upper half of the kernel.
- *
- * FLAGS
- *	NR_TIMESTAMP	updates the 'ts' field on each syscall. This is
- *			a global timestamp for all packets.
- *	NR_RX_TSTMP	if set, the last 64 byte in each buffer will
- *			contain a timestamp for the frame supplied by
- *			the hardware (if supported)
- *	NR_FORWARD	if set, the NS_FORWARD flag in each slot of the
- *			RX ring is checked, and if set the packet is
- *			passed to the other side (host stack or device,
- *			respectively). This permits bpf-like behaviour
- *			or transparency for selected packets.
+ *	Other slots and buffers are reserved for use by the kernel
  */
 struct netmap_ring {
 	/*
-	 * nr_buf_base_ofs is meant to be used through macros.
+	 * buf_ofs is meant to be used through macros.
 	 * It contains the offset of the buffer region from this
 	 * descriptor.
 	 */
-	const ssize_t	buf_ofs;
+	const int64_t	buf_ofs;
 	const uint32_t	num_slots;	/* number of slots in the ring. */
-	uint32_t	avail;		/* number of usable slots */
-	uint32_t        cur;		/* 'current' r/w position */
-	uint32_t	reserved;	/* not refilled before current */
+	const uint32_t	nr_buf_size;
+	const uint16_t	ringid;
+	const uint16_t	dir;		/* 0: tx, 1: rx */
 
-	const uint16_t	nr_buf_size;
-	uint16_t	flags;
-#define	NR_TIMESTAMP	0x0002		/* set timestamp on *sync() */
-#define	NR_FORWARD	0x0004		/* enable NS_FORWARD for ring */
-#define	NR_RX_TSTMP	0x0008		/* set rx timestamp in slots */
+	uint32_t        head;		/* (u) first user slot */
+	uint32_t        cur;		/* (u) wakeup point */
+	uint32_t	tail;		/* (k) first kernel slot */
 
-	struct timeval	ts;		/* time of last *sync() */
+	uint32_t	flags;
 
+	struct timeval	ts;		/* (k) time of last *sync() */
+
+	/* opaque room for a mutex or similar object */
+	uint8_t		sem[128] __attribute__((__aligned__(NM_CACHE_ALIGN)));
+
 	/* the slots follow. This struct has variable size */
 	struct netmap_slot slot[0];	/* array of slots. */
 };
@@ -245,53 +281,191 @@
 
 
 /*
+ * RING FLAGS
+ */
+#define	NR_TIMESTAMP	0x0002		/* set timestamp on *sync() */
+	/*
+	 * updates the 'ts' field on each netmap syscall. This saves
+	 * saves a separate gettimeofday(), and is not much worse than
+	 * software timestamps generated in the interrupt handler.
+	 */
+
+#define	NR_FORWARD	0x0004		/* enable NS_FORWARD for ring */
+ 	/*
+	 * Enables the NS_FORWARD slot flag for the ring.
+	 */
+
+
+/*
  * Netmap representation of an interface and its queue(s).
+ * This is initialized by the kernel when binding a file
+ * descriptor to a port, and should be considered as readonly
+ * by user programs. The kernel never uses it.
+ *
  * There is one netmap_if for each file descriptor on which we want
- * to select/poll.  We assume that on each interface has the same number
- * of receive and transmit queues.
+ * to select/poll.
  * select/poll operates on one or all pairs depending on the value of
  * nmr_queueid passed on the ioctl.
  */
 struct netmap_if {
 	char		ni_name[IFNAMSIZ]; /* name of the interface. */
-	const u_int	ni_version;	/* API version, currently unused */
-	const u_int	ni_rx_rings;	/* number of rx rings */
-	const u_int	ni_tx_rings;	/* if zero, same as ni_rx_rings */
+	const uint32_t	ni_version;	/* API version, currently unused */
+	const uint32_t	ni_flags;	/* properties */
+#define	NI_PRIV_MEM	0x1		/* private memory region */
+
 	/*
+	 * The number of packet rings available in netmap mode.
+	 * Physical NICs can have different numbers of tx and rx rings.
+	 * Physical NICs also have a 'host' ring pair.
+	 * Additionally, clients can request additional ring pairs to
+	 * be used for internal communication.
+	 */
+	const uint32_t	ni_tx_rings;	/* number of HW tx rings */
+	const uint32_t	ni_rx_rings;	/* number of HW rx rings */
+
+	uint32_t	ni_bufs_head;	/* head index for extra bufs */
+	uint32_t	ni_spare1[5];
+	/*
 	 * The following array contains the offset of each netmap ring
-	 * from this structure. The first ni_tx_queues+1 entries refer
-	 * to the tx rings, the next ni_rx_queues+1 refer to the rx rings
-	 * (the last entry in each block refers to the host stack rings).
-	 * The area is filled up by the kernel on NIOCREG,
+	 * from this structure, in the following order:
+	 * NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings;
+	 * NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings.
+	 *
+	 * The area is filled up by the kernel on NIOCREGIF,
 	 * and then only read by userspace code.
 	 */
 	const ssize_t	ring_ofs[0];
 };
 
-#ifndef NIOCREGIF	
+
+#ifndef NIOCREGIF
 /*
  * ioctl names and related fields
  *
+ * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues,
+ *	whose identity is set in NIOCREGIF through nr_ringid.
+ *	These are non blocking and take no argument.
+ *
  * NIOCGINFO takes a struct ifreq, the interface name is the input,
  *	the outputs are number of queues and number of descriptor
  *	for each queue (useful to set number of threads etc.).
+ *	The info returned is only advisory and may change before
+ *	the interface is bound to a file descriptor.
  *
- * NIOCREGIF takes an interface name within a struct ifreq,
+ * NIOCREGIF takes an interface name within a struct nmre,
  *	and activates netmap mode on the interface (if possible).
  *
- * NIOCUNREGIF unregisters the interface associated to the fd.
+ * The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we
+ * can pass it down to other NIC-related ioctls.
  *
- * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues,
- *	whose identity is set in NIOCREGIF through nr_ringid
+ * The actual argument (struct nmreq) has a number of options to request
+ * different functions.
+ * The following are used in NIOCREGIF when nr_cmd == 0:
+ *
+ * nr_name	(in)
+ *	The name of the port (em0, valeXXX:YYY, etc.)
+ *	limited to IFNAMSIZ for backward compatibility.
+ *
+ * nr_version	(in/out)
+ *	Must match NETMAP_API as used in the kernel, error otherwise.
+ *	Always returns the desired value on output.
+ *
+ * nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out)
+ *	On input, non-zero values may be used to reconfigure the port
+ *	according to the requested values, but this is not guaranteed.
+ *	On output the actual values in use are reported.
+ *
+ * nr_ringid (in)
+ *	Indicates how rings should be bound to the file descriptors.
+ *	If nr_flags != 0, then the low bits (in NETMAP_RING_MASK)
+ *	are used to indicate the ring number, and nr_flags specifies
+ *	the actual rings to bind. NETMAP_NO_TX_POLL is unaffected.
+ *
+ *	NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED:
+ *	If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control
+ *	the binding as follows:
+ *	0 (default)			binds all physical rings
+ *	NETMAP_HW_RING | ring number	binds a single ring pair
+ *	NETMAP_SW_RING			binds only the host tx/rx rings
+ *
+ *	NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push
+ *		packets on tx rings only if POLLOUT is set.
+ *		The default is to push any pending packet.
+ *
+ *	NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release
+ *		packets on rx rings also when POLLIN is NOT set.
+ *		The default is to touch the rx ring only with POLLIN.
+ *		Note that this is the opposite of TX because it
+ *		reflects the common usage.
+ *
+ *	NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead.
+ *	NETMAP_PRIV_MEM is set on return for ports that do not use
+ *		the global memory allocator.
+ *		This information is not significant and applications
+ *		should look at the region id in nr_arg2
+ *
+ * nr_flags	is the recommended mode to indicate which rings should
+ *		be bound to a file descriptor. Values are NR_REG_*
+ *
+ * nr_arg1 (in)	The number of extra rings to be reserved.
+ *		Especially when allocating a VALE port the system only
+ *		allocates the amount of memory needed for the port.
+ *		If more shared memory rings are desired (e.g. for pipes),
+ *		the first invocation for the same basename/allocator
+ *		should specify a suitable number. Memory cannot be
+ *		extended after the first allocation without closing
+ *		all ports on the same region.
+ *
+ * nr_arg2 (in/out) The identity of the memory region used.
+ *		On input, 0 means the system decides autonomously,
+ *		other values may try to select a specific region.
+ *		On return the actual value is reported.
+ *		Region '1' is the global allocator, normally shared
+ *		by all interfaces. Other values are private regions.
+ *		If two ports the same region zero-copy is possible.
+ *
+ * nr_arg3 (in/out)	number of extra buffers to be allocated.
+ *
+ *
+ *
+ * nr_cmd (in)	if non-zero indicates a special command:
+ *	NETMAP_BDG_ATTACH	 and nr_name = vale*:ifname
+ *		attaches the NIC to the switch; nr_ringid specifies
+ *		which rings to use. Used by vale-ctl -a ...
+ *	    nr_arg1 = NETMAP_BDG_HOST also attaches the host port
+ *		as in vale-ctl -h ...
+ *
+ *	NETMAP_BDG_DETACH	and nr_name = vale*:ifname
+ *		disconnects a previously attached NIC.
+ *		Used by vale-ctl -d ...
+ *
+ *	NETMAP_BDG_LIST
+ *		list the configuration of VALE switches.
+ *
+ *	NETMAP_BDG_VNET_HDR
+ *		Set the virtio-net header length used by the client
+ *		of a VALE switch port.
+ *
+ *	NETMAP_BDG_NEWIF
+ *		create a persistent VALE port with name nr_name.
+ *		Used by vale-ctl -n ...
+ *
+ *	NETMAP_BDG_DELIF
+ *		delete a persistent VALE port. Used by vale-ctl -d ...
+ *
+ * nr_arg1, nr_arg2, nr_arg3  (in/out)		command specific
+ *
+ *
+ *
  */
 
+
 /*
- * struct nmreq overlays a struct ifreq
+ * struct nmreq overlays a struct ifreq (just the name)
  */
 struct nmreq {
 	char		nr_name[IFNAMSIZ];
 	uint32_t	nr_version;	/* API version */
-#define	NETMAP_API	3		/* current version */
 	uint32_t	nr_offset;	/* nifp offset in the shared region */
 	uint32_t	nr_memsize;	/* size of the shared region */
 	uint32_t	nr_tx_slots;	/* slots in tx rings */
@@ -298,15 +472,50 @@
 	uint32_t	nr_rx_slots;	/* slots in rx rings */
 	uint16_t	nr_tx_rings;	/* number of tx rings */
 	uint16_t	nr_rx_rings;	/* number of rx rings */
+
 	uint16_t	nr_ringid;	/* ring(s) we care about */
-#define NETMAP_HW_RING	0x4000		/* low bits indicate one hw ring */
-#define NETMAP_SW_RING	0x2000		/* process the sw ring */
+#define NETMAP_HW_RING		0x4000	/* single NIC ring pair */
+#define NETMAP_SW_RING		0x2000	/* only host ring pair */
+
+#define NETMAP_RING_MASK	0x0fff	/* the ring number */
+
 #define NETMAP_NO_TX_POLL	0x1000	/* no automatic txsync on poll */
-#define NETMAP_RING_MASK 0xfff		/* the ring number */
-	uint16_t	spare1;
-	uint32_t	spare2[4];
+
+#define NETMAP_DO_RX_POLL	0x8000	/* DO automatic rxsync on poll */
+
+	uint16_t	nr_cmd;
+#define NETMAP_BDG_ATTACH	1	/* attach the NIC */
+#define NETMAP_BDG_DETACH	2	/* detach the NIC */
+#define NETMAP_BDG_REGOPS	3	/* register bridge callbacks */
+#define NETMAP_BDG_LIST		4	/* get bridge's info */
+#define NETMAP_BDG_VNET_HDR     5       /* set the port virtio-net-hdr length */
+#define NETMAP_BDG_OFFSET	NETMAP_BDG_VNET_HDR	/* deprecated alias */
+#define NETMAP_BDG_NEWIF	6	/* create a virtual port */
+#define NETMAP_BDG_DELIF	7	/* destroy a virtual port */
+	uint16_t	nr_arg1;	/* reserve extra rings in NIOCREGIF */
+#define NETMAP_BDG_HOST		1	/* attach the host stack on ATTACH */
+
+	uint16_t	nr_arg2;
+	uint32_t	nr_arg3;	/* req. extra buffers in NIOCREGIF */
+	uint32_t	nr_flags;
+	/* various modes, extends nr_ringid */
+	uint32_t	spare2[1];
 };
 
+#define NR_REG_MASK		0xf /* values for nr_flags */
+enum {	NR_REG_DEFAULT	= 0,	/* backward compat, should not be used. */
+	NR_REG_ALL_NIC	= 1,
+	NR_REG_SW	= 2,
+	NR_REG_NIC_SW	= 3,
+	NR_REG_ONE_NIC	= 4,
+	NR_REG_PIPE_MASTER = 5,
+	NR_REG_PIPE_SLAVE = 6,
+};
+/* monitor uses the NR_REG to select the rings to monitor */
+#define NR_MONITOR_TX	0x100
+#define NR_MONITOR_RX	0x200
+
+
 /*
  * FreeBSD uses the size value embedded in the _IOWR to determine
  * how much to copy in/out. So we need it to match the actual
@@ -315,9 +524,34 @@
  */
 #define NIOCGINFO	_IOWR('i', 145, struct nmreq) /* return IF info */
 #define NIOCREGIF	_IOWR('i', 146, struct nmreq) /* interface register */
-#define NIOCUNREGIF	_IO('i', 147) /* interface unregister */
 #define NIOCTXSYNC	_IO('i', 148) /* sync tx queues */
 #define NIOCRXSYNC	_IO('i', 149) /* sync rx queues */
+#define NIOCCONFIG	_IOWR('i',150, struct nm_ifreq) /* for ext. modules */
 #endif /* !NIOCREGIF */
 
+
+/*
+ * Helper functions for kernel and userspace
+ */
+
+/*
+ * check if space is available in the ring.
+ */
+static inline int
+nm_ring_empty(struct netmap_ring *ring)
+{
+	return (ring->cur == ring->tail);
+}
+
+/*
+ * Opaque structure that is passed to an external kernel
+ * module via ioctl(fd, NIOCCONFIG, req) for a user-owned
+ * bridge port (at this point ephemeral VALE interface).
+ */
+#define NM_IFRDATA_LEN 256
+struct nm_ifreq {
+	char nifr_name[IFNAMSIZ];
+	char data[NM_IFRDATA_LEN];
+};
+
 #endif /* _NET_NETMAP_H_ */

Modified: trunk/sys/net/netmap_user.h
===================================================================
--- trunk/sys/net/netmap_user.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/netmap_user.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,42 +1,35 @@
 /* $MidnightBSD$ */
 /*
- * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
- * 
+ * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
+ *
  * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
  *   1. Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
- * 
  *   2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
- *      documentation and/or other materials provided with the
- *      distribution.
- * 
- *   3. Neither the name of the authors nor the names of their contributors
- *      may be used to endorse or promote products derived from this
- *      software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  */
 
 /*
- * $FreeBSD: stable/9/sys/net/netmap_user.h 246355 2013-02-05 09:40:31Z luigi $
- * $Id: netmap_user.h 10597 2012-02-21 05:08:32Z luigi $
+ * $FreeBSD: stable/10/sys/net/netmap_user.h 278775 2015-02-14 19:18:56Z luigi $
  *
- * This header contains the macros used to manipulate netmap structures
- * and packets in userspace. See netmap(4) for more information.
+ * Functions and macros to manipulate netmap structures and packets
+ * in userspace. See netmap(4) for more information.
  *
  * The address of the struct netmap_if, say nifp, is computed from the
  * value returned from ioctl(.., NIOCREG, ...) and the mmap region:
@@ -48,25 +41,47 @@
  * From there:
  *	struct netmap_ring *NETMAP_TXRING(nifp, index)
  *	struct netmap_ring *NETMAP_RXRING(nifp, index)
- *		we can access ring->nr_cur, ring->nr_avail, ring->nr_flags
+ *		we can access ring->cur, ring->head, ring->tail, etc.
  *
  *	ring->slot[i] gives us the i-th slot (we can access
- *		directly plen, flags, bufindex)
+ *		directly len, flags, buf_idx)
  *
- *	char *buf = NETMAP_BUF(ring, index) returns a pointer to
- *		the i-th buffer
+ *	char *buf = NETMAP_BUF(ring, x) returns a pointer to
+ *		the buffer numbered x
  *
- * Since rings are circular, we have macros to compute the next index
- *	i = NETMAP_RING_NEXT(ring, i);
+ * All ring indexes (head, cur, tail) should always move forward.
+ * To compute the next index in a circular ring you can use
+ *	i = nm_ring_next(ring, i);
+ *
+ * To ease porting apps from pcap to netmap we supply a few fuctions
+ * that can be called to open, close, read and write on netmap in a way
+ * similar to libpcap. Note that the read/write function depend on
+ * an ioctl()/select()/poll() being issued to refill rings or push
+ * packets out.
+ *
+ * In order to use these, include #define NETMAP_WITH_LIBS
+ * in the source file that invokes these functions.
  */
 
 #ifndef _NET_NETMAP_USER_H_
 #define _NET_NETMAP_USER_H_
 
+#include <stdint.h>
+#include <sys/socket.h>		/* apple needs sockaddr */
+#include <net/if.h>		/* IFNAMSIZ */
+
+#ifndef likely
+#define likely(x)	__builtin_expect(!!(x), 1)
+#define unlikely(x)	__builtin_expect(!!(x), 0)
+#endif /* likely and unlikely */
+
+#include <net/netmap.h>
+
+/* helper macro */
 #define _NETMAP_OFFSET(type, ptr, offset) \
 	((type)(void *)((char *)(ptr) + (offset)))
 
-#define NETMAP_IF(b, o)	_NETMAP_OFFSET(struct netmap_if *, b, o)
+#define NETMAP_IF(_base, _ofs)	_NETMAP_OFFSET(struct netmap_if *, _base, _ofs)
 
 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
 	nifp, (nifp)->ring_ofs[index] )
@@ -79,19 +94,589 @@
 
 #define NETMAP_BUF_IDX(ring, buf)			\
 	( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
-		(ring)->nr_buf_size ) 
+		(ring)->nr_buf_size )
 
-#define	NETMAP_RING_NEXT(r, i)				\
-	((i)+1 == (r)->num_slots ? 0 : (i) + 1 )
 
-#define	NETMAP_RING_FIRST_RESERVED(r)			\
-	( (r)->cur < (r)->reserved ?			\
-	  (r)->cur + (r)->num_slots - (r)->reserved :	\
-	  (r)->cur - (r)->reserved )
+static inline uint32_t
+nm_ring_next(struct netmap_ring *r, uint32_t i)
+{
+	return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1);
+}
 
+
 /*
- * Return 1 if the given tx ring is empty.
+ * Return 1 if we have pending transmissions in the tx ring.
+ * When everything is complete ring->head = ring->tail + 1 (modulo ring size)
  */
-#define NETMAP_TX_RING_EMPTY(r)	((r)->avail >= (r)->num_slots - 1)
+static inline int
+nm_tx_pending(struct netmap_ring *r)
+{
+	return nm_ring_next(r, r->tail) != r->head;
+}
 
+
+static inline uint32_t
+nm_ring_space(struct netmap_ring *ring)
+{
+        int ret = ring->tail - ring->cur;
+        if (ret < 0)
+                ret += ring->num_slots;
+        return ret;
+}
+
+
+#ifdef NETMAP_WITH_LIBS
+/*
+ * Support for simple I/O libraries.
+ * Include other system headers required for compiling this.
+ */
+
+#ifndef HAVE_NETMAP_WITH_LIBS
+#define HAVE_NETMAP_WITH_LIBS
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <string.h>	/* memset */
+#include <sys/ioctl.h>
+#include <sys/errno.h>	/* EINVAL */
+#include <fcntl.h>	/* O_RDWR */
+#include <unistd.h>	/* close() */
+#include <signal.h>
+#include <stdlib.h>
+
+#ifndef ND /* debug macros */
+/* debug support */
+#define ND(_fmt, ...) do {} while(0)
+#define D(_fmt, ...)						\
+	do {							\
+		struct timeval _t0;				\
+		gettimeofday(&_t0, NULL);			\
+		fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n",	\
+		    (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec,	\
+		    __FUNCTION__, __LINE__, ##__VA_ARGS__);	\
+        } while (0)
+
+/* Rate limited version of "D", lps indicates how many per second */
+#define RD(lps, format, ...)                                    \
+    do {                                                        \
+        static int __t0, __cnt;                                 \
+        struct timeval __xxts;                                  \
+        gettimeofday(&__xxts, NULL);                            \
+        if (__t0 != __xxts.tv_sec) {                            \
+            __t0 = __xxts.tv_sec;                               \
+            __cnt = 0;                                          \
+        }                                                       \
+        if (__cnt++ < lps) {                                    \
+            D(format, ##__VA_ARGS__);                           \
+        }                                                       \
+    } while (0)
+#endif
+
+struct nm_pkthdr {	/* same as pcap_pkthdr */
+	struct timeval	ts;
+	uint32_t	caplen;
+	uint32_t	len;
+};
+
+struct nm_stat {	/* same as pcap_stat	*/
+	u_int	ps_recv;
+	u_int	ps_drop;
+	u_int	ps_ifdrop;
+#ifdef WIN32
+	u_int	bs_capt;
+#endif /* WIN32 */
+};
+
+#define NM_ERRBUF_SIZE	512
+
+struct nm_desc {
+	struct nm_desc *self; /* point to self if netmap. */
+	int fd;
+	void *mem;
+	uint32_t memsize;
+	int done_mmap;	/* set if mem is the result of mmap */
+	struct netmap_if * const nifp;
+	uint16_t first_tx_ring, last_tx_ring, cur_tx_ring;
+	uint16_t first_rx_ring, last_rx_ring, cur_rx_ring;
+	struct nmreq req;	/* also contains the nr_name = ifname */
+	struct nm_pkthdr hdr;
+
+	/*
+	 * The memory contains netmap_if, rings and then buffers.
+	 * Given a pointer (e.g. to nm_inject) we can compare with
+	 * mem/buf_start/buf_end to tell if it is a buffer or
+	 * some other descriptor in our region.
+	 * We also store a pointer to some ring as it helps in the
+	 * translation from buffer indexes to addresses.
+	 */
+	struct netmap_ring * const some_ring;
+	void * const buf_start;
+	void * const buf_end;
+	/* parameters from pcap_open_live */
+	int snaplen;
+	int promisc;
+	int to_ms;
+	char *errbuf;
+
+	/* save flags so we can restore them on close */
+	uint32_t if_flags;
+        uint32_t if_reqcap;
+        uint32_t if_curcap;
+
+	struct nm_stat st;
+	char msg[NM_ERRBUF_SIZE];
+};
+
+/*
+ * when the descriptor is open correctly, d->self == d
+ * Eventually we should also use some magic number.
+ */
+#define P2NMD(p)		((struct nm_desc *)(p))
+#define IS_NETMAP_DESC(d)	((d) && P2NMD(d)->self == P2NMD(d))
+#define NETMAP_FD(d)		(P2NMD(d)->fd)
+
+
+/*
+ * this is a slightly optimized copy routine which rounds
+ * to multiple of 64 bytes and is often faster than dealing
+ * with other odd sizes. We assume there is enough room
+ * in the source and destination buffers.
+ *
+ * XXX only for multiples of 64 bytes, non overlapped.
+ */
+static inline void
+nm_pkt_copy(const void *_src, void *_dst, int l)
+{
+	const uint64_t *src = (const uint64_t *)_src;
+	uint64_t *dst = (uint64_t *)_dst;
+
+	if (unlikely(l >= 1024)) {
+		memcpy(dst, src, l);
+		return;
+	}
+	for (; likely(l > 0); l-=64) {
+		*dst++ = *src++;
+		*dst++ = *src++;
+		*dst++ = *src++;
+		*dst++ = *src++;
+		*dst++ = *src++;
+		*dst++ = *src++;
+		*dst++ = *src++;
+		*dst++ = *src++;
+	}
+}
+
+
+/*
+ * The callback, invoked on each received packet. Same as libpcap
+ */
+typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d);
+
+/*
+ *--- the pcap-like API ---
+ *
+ * nm_open() opens a file descriptor, binds to a port and maps memory.
+ *
+ * ifname	(netmap:foo or vale:foo) is the port name
+ *		a suffix can indicate the follwing:
+ *		^		bind the host (sw) ring pair
+ *		*		bind host and NIC ring pairs (transparent)
+ *		-NN		bind individual NIC ring pair
+ *		{NN		bind master side of pipe NN
+ *		}NN		bind slave side of pipe NN
+ *
+ * req		provides the initial values of nmreq before parsing ifname.
+ *		Remember that the ifname parsing will override the ring
+ *		number in nm_ringid, and part of nm_flags;
+ * flags	special functions, normally 0
+ *		indicates which fields of *arg are significant
+ * arg		special functions, normally NULL
+ *		if passed a netmap_desc with mem != NULL,
+ *		use that memory instead of mmap.
+ */
+
+static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req,
+	uint64_t flags, const struct nm_desc *arg);
+
+/*
+ * nm_open can import some fields from the parent descriptor.
+ * These flags control which ones.
+ * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL,
+ * which set the initial value for these flags.
+ * Note that the 16 low bits of the flags are reserved for data
+ * that may go into the nmreq.
+ */
+enum {
+	NM_OPEN_NO_MMAP =	0x040000, /* reuse mmap from parent */
+	NM_OPEN_IFNAME =	0x080000, /* nr_name, nr_ringid, nr_flags */
+	NM_OPEN_ARG1 =		0x100000,
+	NM_OPEN_ARG2 =		0x200000,
+	NM_OPEN_ARG3 =		0x400000,
+	NM_OPEN_RING_CFG =	0x800000, /* tx|rx rings|slots */
+};
+
+
+/*
+ * nm_close()	closes and restores the port to its previous state
+ */
+
+static int nm_close(struct nm_desc *);
+
+/*
+ * nm_inject() is the same as pcap_inject()
+ * nm_dispatch() is the same as pcap_dispatch()
+ * nm_nextpkt() is the same as pcap_next()
+ */
+
+static int nm_inject(struct nm_desc *, const void *, size_t);
+static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *);
+static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *);
+
+
+/*
+ * Try to open, return descriptor if successful, NULL otherwise.
+ * An invalid netmap name will return errno = 0;
+ * You can pass a pointer to a pre-filled nm_desc to add special
+ * parameters. Flags is used as follows
+ * NM_OPEN_NO_MMAP	use the memory from arg, only
+ *			if the nr_arg2 (memory block) matches.
+ * NM_OPEN_ARG1		use req.nr_arg1 from arg
+ * NM_OPEN_ARG2		use req.nr_arg2 from arg
+ * NM_OPEN_RING_CFG	user ring config from arg
+ */
+static struct nm_desc *
+nm_open(const char *ifname, const struct nmreq *req,
+	uint64_t new_flags, const struct nm_desc *arg)
+{
+	struct nm_desc *d = NULL;
+	const struct nm_desc *parent = arg;
+	u_int namelen;
+	uint32_t nr_ringid = 0, nr_flags;
+	const char *port = NULL;
+	const char *errmsg = NULL;
+
+	if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) {
+		errno = 0; /* name not recognised, not an error */
+		return NULL;
+	}
+	if (ifname[0] == 'n')
+		ifname += 7;
+	/* scan for a separator */
+	for (port = ifname; *port && !index("-*^{}", *port); port++)
+		;
+	namelen = port - ifname;
+	if (namelen >= sizeof(d->req.nr_name)) {
+		errmsg = "name too long";
+		goto fail;
+	}
+	switch (*port) {
+	default:  /* '\0', no suffix */
+		nr_flags = NR_REG_ALL_NIC;
+		break;
+	case '-': /* one NIC */
+		nr_flags = NR_REG_ONE_NIC;
+		nr_ringid = atoi(port + 1);
+		break;
+	case '*': /* NIC and SW, ignore port */
+		nr_flags = NR_REG_NIC_SW;
+		if (port[1]) {
+			errmsg = "invalid port for nic+sw";
+			goto fail;
+		}
+		break;
+	case '^': /* only sw ring */
+		nr_flags = NR_REG_SW;
+		if (port[1]) {
+			errmsg = "invalid port for sw ring";
+			goto fail;
+		}
+		break;
+	case '{':
+		nr_flags = NR_REG_PIPE_MASTER;
+		nr_ringid = atoi(port + 1);
+		break;
+	case '}':
+		nr_flags = NR_REG_PIPE_SLAVE;
+		nr_ringid = atoi(port + 1);
+		break;
+	}
+
+	if (nr_ringid >= NETMAP_RING_MASK) {
+		errmsg = "invalid ringid";
+		goto fail;
+	}
+
+	d = (struct nm_desc *)calloc(1, sizeof(*d));
+	if (d == NULL) {
+		errmsg = "nm_desc alloc failure";
+		errno = ENOMEM;
+		return NULL;
+	}
+	d->self = d;	/* set this early so nm_close() works */
+	d->fd = open("/dev/netmap", O_RDWR);
+	if (d->fd < 0) {
+		errmsg = "cannot open /dev/netmap";
+		goto fail;
+	}
+
+	if (req)
+		d->req = *req;
+	d->req.nr_version = NETMAP_API;
+	d->req.nr_ringid &= ~NETMAP_RING_MASK;
+
+	/* these fields are overridden by ifname and flags processing */
+	d->req.nr_ringid |= nr_ringid;
+	d->req.nr_flags = nr_flags;
+	memcpy(d->req.nr_name, ifname, namelen);
+	d->req.nr_name[namelen] = '\0';
+	/* optionally import info from parent */
+	if (IS_NETMAP_DESC(parent) && new_flags) {
+		if (new_flags & NM_OPEN_ARG1)
+			D("overriding ARG1 %d", parent->req.nr_arg1);
+		d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ?
+			parent->req.nr_arg1 : 4;
+		if (new_flags & NM_OPEN_ARG2)
+			D("overriding ARG2 %d", parent->req.nr_arg2);
+		d->req.nr_arg2 = new_flags & NM_OPEN_ARG2 ?
+			parent->req.nr_arg2 : 0;
+		if (new_flags & NM_OPEN_ARG3)
+			D("overriding ARG3 %d", parent->req.nr_arg3);
+		d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ?
+			parent->req.nr_arg3 : 0;
+		if (new_flags & NM_OPEN_RING_CFG) {
+			D("overriding RING_CFG");
+			d->req.nr_tx_slots = parent->req.nr_tx_slots;
+			d->req.nr_rx_slots = parent->req.nr_rx_slots;
+			d->req.nr_tx_rings = parent->req.nr_tx_rings;
+			d->req.nr_rx_rings = parent->req.nr_rx_rings;
+		}
+		if (new_flags & NM_OPEN_IFNAME) {
+			D("overriding ifname %s ringid 0x%x flags 0x%x",
+				parent->req.nr_name, parent->req.nr_ringid,
+				parent->req.nr_flags);
+			memcpy(d->req.nr_name, parent->req.nr_name,
+				sizeof(d->req.nr_name));
+			d->req.nr_ringid = parent->req.nr_ringid;
+			d->req.nr_flags = parent->req.nr_flags;
+		}
+	}
+	/* add the *XPOLL flags */
+	d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL);
+
+	if (ioctl(d->fd, NIOCREGIF, &d->req)) {
+		errmsg = "NIOCREGIF failed";
+		goto fail;
+	}
+
+	if (IS_NETMAP_DESC(parent) && parent->mem &&
+	    parent->req.nr_arg2 == d->req.nr_arg2) {
+		/* do not mmap, inherit from parent */
+		d->memsize = parent->memsize;
+		d->mem = parent->mem;
+	} else {
+		/* XXX TODO: check if memsize is too large (or there is overflow) */
+		d->memsize = d->req.nr_memsize;
+		d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+				d->fd, 0);
+		if (d->mem == MAP_FAILED) {
+			errmsg = "mmap failed";
+			goto fail;
+		}
+		d->done_mmap = 1;
+	}
+	{
+		struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset);
+		struct netmap_ring *r = NETMAP_RXRING(nifp, );
+
+		*(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp;
+		*(struct netmap_ring **)(uintptr_t)&d->some_ring = r;
+		*(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0);
+		*(void **)(uintptr_t)&d->buf_end =
+			(char *)d->mem + d->memsize;
+	}
+
+	if (d->req.nr_flags ==  NR_REG_SW) { /* host stack */
+		d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings;
+		d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings;
+	} else if (d->req.nr_flags ==  NR_REG_ALL_NIC) { /* only nic */
+		d->first_tx_ring = 0;
+		d->first_rx_ring = 0;
+		d->last_tx_ring = d->req.nr_tx_rings - 1;
+		d->last_rx_ring = d->req.nr_rx_rings - 1;
+	} else if (d->req.nr_flags ==  NR_REG_NIC_SW) {
+		d->first_tx_ring = 0;
+		d->first_rx_ring = 0;
+		d->last_tx_ring = d->req.nr_tx_rings;
+		d->last_rx_ring = d->req.nr_rx_rings;
+	} else if (d->req.nr_flags == NR_REG_ONE_NIC) {
+		/* XXX check validity */
+		d->first_tx_ring = d->last_tx_ring =
+		d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK;
+	} else { /* pipes */
+		d->first_tx_ring = d->last_tx_ring = 0;
+		d->first_rx_ring = d->last_rx_ring = 0;
+	}
+
+#ifdef DEBUG_NETMAP_USER
+    { /* debugging code */
+	int i;
+
+	D("%s tx %d .. %d %d rx %d .. %d %d", ifname,
+		d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings,
+                d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings);
+	for (i = 0; i <= d->req.nr_tx_rings; i++) {
+		struct netmap_ring *r = NETMAP_TXRING(d->nifp, i);
+		D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail);
+	}
+	for (i = 0; i <= d->req.nr_rx_rings; i++) {
+		struct netmap_ring *r = NETMAP_RXRING(d->nifp, i);
+		D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail);
+	}
+    }
+#endif /* debugging */
+
+	d->cur_tx_ring = d->first_tx_ring;
+	d->cur_rx_ring = d->first_rx_ring;
+	return d;
+
+fail:
+	nm_close(d);
+	if (errmsg)
+		D("%s %s", errmsg, ifname);
+	if (errno == 0)
+		errno = EINVAL;
+	return NULL;
+}
+
+
+static int
+nm_close(struct nm_desc *d)
+{
+	/*
+	 * ugly trick to avoid unused warnings
+	 */
+	static void *__xxzt[] __attribute__ ((unused))  =
+		{ (void *)nm_open, (void *)nm_inject,
+		  (void *)nm_dispatch, (void *)nm_nextpkt } ;
+
+	if (d == NULL || d->self != d)
+		return EINVAL;
+	if (d->done_mmap && d->mem)
+		munmap(d->mem, d->memsize);
+	if (d->fd != -1)
+		close(d->fd);
+	bzero(d, sizeof(*d));
+	free(d);
+	return 0;
+}
+
+
+/*
+ * Same prototype as pcap_inject(), only need to cast.
+ */
+static int
+nm_inject(struct nm_desc *d, const void *buf, size_t size)
+{
+	u_int c, n = d->last_tx_ring - d->first_tx_ring + 1;
+
+	for (c = 0; c < n ; c++) {
+		/* compute current ring to use */
+		struct netmap_ring *ring;
+		uint32_t i, idx;
+		uint32_t ri = d->cur_tx_ring + c;
+
+		if (ri > d->last_tx_ring)
+			ri = d->first_tx_ring;
+		ring = NETMAP_TXRING(d->nifp, ri);
+		if (nm_ring_empty(ring)) {
+			continue;
+		}
+		i = ring->cur;
+		idx = ring->slot[i].buf_idx;
+		ring->slot[i].len = size;
+		nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size);
+		d->cur_tx_ring = ri;
+		ring->head = ring->cur = nm_ring_next(ring, i);
+		return size;
+	}
+	return 0; /* fail */
+}
+
+
+/*
+ * Same prototype as pcap_dispatch(), only need to cast.
+ */
+static int
+nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg)
+{
+	int n = d->last_rx_ring - d->first_rx_ring + 1;
+	int c, got = 0, ri = d->cur_rx_ring;
+
+	if (cnt == 0)
+		cnt = -1;
+	/* cnt == -1 means infinite, but rings have a finite amount
+	 * of buffers and the int is large enough that we never wrap,
+	 * so we can omit checking for -1
+	 */
+	for (c=0; c < n && cnt != got; c++) {
+		/* compute current ring to use */
+		struct netmap_ring *ring;
+
+		ri = d->cur_rx_ring + c;
+		if (ri > d->last_rx_ring)
+			ri = d->first_rx_ring;
+		ring = NETMAP_RXRING(d->nifp, ri);
+		for ( ; !nm_ring_empty(ring) && cnt != got; got++) {
+			u_int i = ring->cur;
+			u_int idx = ring->slot[i].buf_idx;
+			u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
+
+			// __builtin_prefetch(buf);
+			d->hdr.len = d->hdr.caplen = ring->slot[i].len;
+			d->hdr.ts = ring->ts;
+			cb(arg, &d->hdr, buf);
+			ring->head = ring->cur = nm_ring_next(ring, i);
+		}
+	}
+	d->cur_rx_ring = ri;
+	return got;
+}
+
+static u_char *
+nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr)
+{
+	int ri = d->cur_rx_ring;
+
+	do {
+		/* compute current ring to use */
+		struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri);
+		if (!nm_ring_empty(ring)) {
+			u_int i = ring->cur;
+			u_int idx = ring->slot[i].buf_idx;
+			u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
+
+			// __builtin_prefetch(buf);
+			hdr->ts = ring->ts;
+			hdr->len = hdr->caplen = ring->slot[i].len;
+			ring->cur = nm_ring_next(ring, i);
+			/* we could postpone advancing head if we want
+			 * to hold the buffer. This can be supported in
+			 * the future.
+			 */
+			ring->head = ring->cur;
+			d->cur_rx_ring = ri;
+			return buf;
+		}
+		ri++;
+		if (ri > d->last_rx_ring)
+			ri = d->first_rx_ring;
+	} while (ri != d->cur_rx_ring);
+	return NULL; /* nothing found */
+}
+
+#endif /* !HAVE_NETMAP_WITH_LIBS */
+
+#endif /* NETMAP_WITH_LIBS */
+
 #endif /* _NET_NETMAP_USER_H_ */

Added: trunk/sys/net/paravirt.h
===================================================================
--- trunk/sys/net/paravirt.h	                        (rev 0)
+++ trunk/sys/net/paravirt.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,158 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (C) 2013 Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef NET_PARAVIRT_H
+#define NET_PARAVIRT_H
+
+ /*
+  * $FreeBSD: stable/10/sys/net/paravirt.h 289385 2015-10-15 20:36:04Z adrian $
+  *
+ Support for virtio-like communication between host (H) and guest (G) NICs.
+
+ THIS IS EXPERIMENTAL CODE AND SUBJECT TO CHANGE.
+
+ The guest allocates the shared Communication Status Block (csb) and
+ write its physical address at CSBAL and CSBAH (data is little endian).
+ csb->csb_on enables the mode. If disabled, the device acts a regular one.
+
+ Notifications for tx and rx are exchanged without vm exits
+ if possible. In particular (only mentioning csb mode below),
+ the following actions are performed. In the description below,
+ "double check" means verifying again the condition that caused
+ the previous action, and reverting the action if the condition has
+ changed. The condition typically depends on a variable set by the
+ other party, and the double check is done to avoid races. E.g.
+
+	// start with A=0
+    again:
+	// do something
+	if ( cond(C) ) { // C is written by the other side
+	    A = 1;
+	    // barrier
+	    if ( !cond(C) ) {
+		A = 0;
+		goto again;
+	    }
+	}
+
+ TX: start from idle:
+    H starts with host_need_txkick=1 when the I/O thread bh is idle. Upon new
+    transmissions, G always updates guest_tdt.  If host_need_txkick == 1,
+    G also writes to the TDT, which acts as a kick to H (so pending
+    writes are always dispatched to H as soon as possible.)
+
+ TX: active state:
+    On the kick (TDT write) H sets host_need_txkick == 0 (if not
+    done already by G), and starts an I/O thread trying to consume
+    packets from TDH to guest_tdt, periodically refreshing host_tdh
+    and TDH.  When host_tdh == guest_tdt, H sets host_need_txkick=1,
+    and then does the "double check" for race avoidance.
+
+ TX: G runs out of buffers
+    XXX there are two mechanisms, one boolean (using guest_need_txkick)
+    and one with a threshold (using guest_txkick_at). They are mutually
+    exclusive.
+    BOOLEAN: when G has no space, it sets guest_need_txkick=1 and does
+        the double check. If H finds guest_need_txkick== 1 on a write
+        to TDH, it also generates an interrupt.
+    THRESHOLD: G sets guest_txkick_at to the TDH value for which it
+	wants to receive an interrupt. When H detects that TDH moves
+	across guest_txkick_at, it generates an interrupt.
+	This second mechanism reduces the number of interrupts and
+	TDT writes on the transmit side when the host is too slow.
+
+ RX: start from idle
+    G starts with guest_need_rxkick = 1 when the receive ring is empty.
+    As packets arrive, H updates host_rdh (and RDH) and also generates an
+    interrupt when guest_need_rxkick == 1 (so incoming packets are
+    always reported to G as soon as possible, apart from interrupt
+    moderation delays). It also tracks guest_rdt for new buffers.
+
+ RX: active state
+    As the interrupt arrives, G sets guest_need_rxkick = 0 and starts
+    draining packets from the receive ring, while updating guest_rdt
+    When G runs out of packets it sets guest_need_rxkick=1 and does the
+    double check.
+
+ RX: H runs out of buffers
+    XXX there are two mechanisms, one boolean (using host_need_rxkick)
+    and one with a threshold (using host_xxkick_at). They are mutually
+    exclusive.
+    BOOLEAN: when H has no space, it sets host_need_rxkick=1 and does the
+	double check. If G finds host_need_rxkick==1 on updating guest_rdt,
+        it also writes to RDT causing a kick to H.
+    THRESHOLD: H sets host_rxkick_at to the RDT value for which it wants
+	to receive a kick. When G detects that guest_rdt moves across
+	host_rxkick_at, it writes to RDT thus generates a kick.
+	This second mechanism reduces the number of kicks and
+        RDT writes on the receive side when the guest is too slow and
+	would free only a few buffers at a time.
+
+ */
+struct paravirt_csb {
+    /* XXX revise the layout to minimize cache bounces.
+     * Usage is described as follows:
+     * 	[GH][RW][+-0]	guest/host reads/writes frequently/rarely/almost never
+     */
+    /* these are (mostly) written by the guest */
+    uint32_t guest_tdt;            /* GW+ HR+ pkt to transmit */
+    uint32_t guest_need_txkick;    /* GW- HR+ G ran out of tx bufs, request kick */
+    uint32_t guest_need_rxkick;    /* GW- HR+ G ran out of rx pkts, request kick  */
+    uint32_t guest_csb_on;         /* GW- HR+ enable paravirtual mode */
+    uint32_t guest_rdt;            /* GW+ HR+ rx buffers available */
+    uint32_t guest_txkick_at;      /* GW- HR+ tx ring pos. where G expects an intr */
+    uint32_t guest_use_msix;        /* GW0 HR0 guest uses MSI-X interrupts. */
+    uint32_t pad[9];
+
+    /* these are (mostly) written by the host */
+    uint32_t host_tdh;             /* GR0 HW- shadow register, mostly unused */
+    uint32_t host_need_txkick;     /* GR+ HW- start the iothread */
+    uint32_t host_txcycles_lim;    /* GW- HR- how much to spin before  sleep.
+				    * set by the guest */
+    uint32_t host_txcycles;        /* GR0 HW- counter, but no need to be exported */
+    uint32_t host_rdh;             /* GR0 HW- shadow register, mostly unused */
+    uint32_t host_need_rxkick;     /* GR+ HW- flush rx queued packets */
+    uint32_t host_isr;             /* GR* HW* shadow copy of ISR */
+    uint32_t host_rxkick_at;       /* GR+ HW- rx ring pos where H expects a kick */
+    uint32_t vnet_ring_high;	/* Vnet ring physical address high. */
+    uint32_t vnet_ring_low;	/* Vnet ring physical address low. */
+};
+
+#define NET_PARAVIRT_CSB_SIZE   4096
+#define NET_PARAVIRT_NONE   (~((uint32_t)0))
+
+#ifdef	QEMU_PCI_H
+
+/*
+ * API functions only available within QEMU
+ */
+
+void paravirt_configure_csb(struct paravirt_csb** csb, uint32_t csbbal,
+			uint32_t csbbah, QEMUBH* tx_bh, AddressSpace *as);
+
+#endif /* QEMU_PCI_H */
+
+#endif /* NET_PARAVIRT_H */


Property changes on: trunk/sys/net/paravirt.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/pfil.c
===================================================================
--- trunk/sys/net/pfil.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/pfil.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,5 @@
 /* $MidnightBSD$ */
-/*	$FreeBSD: stable/9/sys/net/pfil.c 198233 2009-10-19 15:19:14Z rwatson $ */
+/*	$FreeBSD: stable/10/sys/net/pfil.c 254774 2013-08-24 11:24:15Z andre $ */
 /*	$NetBSD: pfil.c,v 1.20 2001/11/12 23:49:46 lukem Exp $	*/
 
 /*-
@@ -53,18 +53,18 @@
 MTX_SYSINIT(pfil_heads_lock, &pfil_global_lock, "pfil_head_list lock",
   MTX_DEF);
 
-static int pfil_list_add(pfil_list_t *, struct packet_filter_hook *, int);
+static struct packet_filter_hook *pfil_chain_get(int, struct pfil_head *);
+static int pfil_chain_add(pfil_chain_t *, struct packet_filter_hook *, int);
+static int pfil_chain_remove(pfil_chain_t *, pfil_func_t, void *);
 
-static int pfil_list_remove(pfil_list_t *,
-    int (*)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
-    void *);
-
 LIST_HEAD(pfilheadhead, pfil_head);
 VNET_DEFINE(struct pfilheadhead, pfil_head_list);
 #define	V_pfil_head_list	VNET(pfil_head_list)
+VNET_DEFINE(struct rmlock, pfil_lock);
+#define	V_pfil_lock	VNET(pfil_lock)
 
 /*
- * pfil_run_hooks() runs the specified packet filter hooks.
+ * pfil_run_hooks() runs the specified packet filter hook chain.
  */
 int
 pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
@@ -77,8 +77,8 @@
 
 	PFIL_RLOCK(ph, &rmpt);
 	KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0"));
-	for (pfh = pfil_hook_get(dir, ph); pfh != NULL;
-	     pfh = TAILQ_NEXT(pfh, pfil_link)) {
+	for (pfh = pfil_chain_get(dir, ph); pfh != NULL;
+	     pfh = TAILQ_NEXT(pfh, pfil_chain)) {
 		if (pfh->pfil_func != NULL) {
 			rv = (*pfh->pfil_func)(pfh->pfil_arg, &m, ifp, dir,
 			    inp);
@@ -91,7 +91,81 @@
 	return (rv);
 }
 
+static struct packet_filter_hook *
+pfil_chain_get(int dir, struct pfil_head *ph)
+{
+
+	if (dir == PFIL_IN)
+		return (TAILQ_FIRST(&ph->ph_in));
+	else if (dir == PFIL_OUT)
+		return (TAILQ_FIRST(&ph->ph_out));
+	else
+		return (NULL);
+}
+
 /*
+ * pfil_try_rlock() acquires rm reader lock for specified head
+ * if this is immediately possible.
+ */
+int
+pfil_try_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+{
+
+	return (PFIL_TRY_RLOCK(ph, tracker));
+}
+
+/*
+ * pfil_rlock() acquires rm reader lock for specified head.
+ */
+void
+pfil_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+{
+
+	PFIL_RLOCK(ph, tracker);
+}
+
+/*
+ * pfil_runlock() releases reader lock for specified head.
+ */
+void
+pfil_runlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+{
+
+	PFIL_RUNLOCK(ph, tracker);
+}
+
+/*
+ * pfil_wlock() acquires writer lock for specified head.
+ */
+void
+pfil_wlock(struct pfil_head *ph)
+{
+
+	PFIL_WLOCK(ph);
+}
+
+/*
+ * pfil_wunlock() releases writer lock for specified head.
+ */
+void
+pfil_wunlock(struct pfil_head *ph)
+{
+
+	PFIL_WUNLOCK(ph);
+}
+
+/*
+ * pfil_wowned() returns a non-zero value if the current thread owns
+ * an exclusive lock.
+ */
+int
+pfil_wowned(struct pfil_head *ph)
+{
+
+	return (PFIL_WOWNED(ph));
+}
+
+/*
  * pfil_head_register() registers a pfil_head with the packet filter hook
  * mechanism.
  */
@@ -100,11 +174,11 @@
 {
 	struct pfil_head *lph;
 
-	PFIL_LIST_LOCK();
+	PFIL_HEADLIST_LOCK();
 	LIST_FOREACH(lph, &V_pfil_head_list, ph_list) {
 		if (ph->ph_type == lph->ph_type &&
 		    ph->ph_un.phu_val == lph->ph_un.phu_val) {
-			PFIL_LIST_UNLOCK();
+			PFIL_HEADLIST_UNLOCK();
 			return (EEXIST);
 		}
 	}
@@ -113,7 +187,7 @@
 	TAILQ_INIT(&ph->ph_in);
 	TAILQ_INIT(&ph->ph_out);
 	LIST_INSERT_HEAD(&V_pfil_head_list, ph, ph_list);
-	PFIL_LIST_UNLOCK();
+	PFIL_HEADLIST_UNLOCK();
 	return (0);
 }
 
@@ -127,12 +201,12 @@
 {
 	struct packet_filter_hook *pfh, *pfnext;
 		
-	PFIL_LIST_LOCK();
+	PFIL_HEADLIST_LOCK();
 	LIST_REMOVE(ph, ph_list);
-	PFIL_LIST_UNLOCK();
-	TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_link, pfnext)
+	PFIL_HEADLIST_UNLOCK();
+	TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_chain, pfnext)
 		free(pfh, M_IFADDR);
-	TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_link, pfnext)
+	TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_chain, pfnext)
 		free(pfh, M_IFADDR);
 	PFIL_LOCK_DESTROY(ph);
 	return (0);
@@ -146,11 +220,11 @@
 {
 	struct pfil_head *ph;
 
-	PFIL_LIST_LOCK();
+	PFIL_HEADLIST_LOCK();
 	LIST_FOREACH(ph, &V_pfil_head_list, ph_list)
 		if (ph->ph_type == type && ph->ph_un.phu_val == val)
 			break;
-	PFIL_LIST_UNLOCK();
+	PFIL_HEADLIST_UNLOCK();
 	return (ph);
 }
 
@@ -163,8 +237,7 @@
  *	PFIL_WAITOK	OK to call malloc with M_WAITOK.
  */
 int
-pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
-  struct inpcb *), void *arg, int flags, struct pfil_head *ph)
+pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
 {
 	struct packet_filter_hook *pfh1 = NULL;
 	struct packet_filter_hook *pfh2 = NULL;
@@ -190,7 +263,7 @@
 	if (flags & PFIL_IN) {
 		pfh1->pfil_func = func;
 		pfh1->pfil_arg = arg;
-		err = pfil_list_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
+		err = pfil_chain_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
 		if (err)
 			goto locked_error;
 		ph->ph_nhooks++;
@@ -198,10 +271,10 @@
 	if (flags & PFIL_OUT) {
 		pfh2->pfil_func = func;
 		pfh2->pfil_arg = arg;
-		err = pfil_list_add(&ph->ph_out, pfh2, flags & ~PFIL_IN);
+		err = pfil_chain_add(&ph->ph_out, pfh2, flags & ~PFIL_IN);
 		if (err) {
 			if (flags & PFIL_IN)
-				pfil_list_remove(&ph->ph_in, func, arg);
+				pfil_chain_remove(&ph->ph_in, func, arg);
 			goto locked_error;
 		}
 		ph->ph_nhooks++;
@@ -220,22 +293,21 @@
 
 /*
  * pfil_remove_hook removes a specific function from the packet filter hook
- * list.
+ * chain.
  */
 int
-pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
-    struct inpcb *), void *arg, int flags, struct pfil_head *ph)
+pfil_remove_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
 {
 	int err = 0;
 
 	PFIL_WLOCK(ph);
 	if (flags & PFIL_IN) {
-		err = pfil_list_remove(&ph->ph_in, func, arg);
+		err = pfil_chain_remove(&ph->ph_in, func, arg);
 		if (err == 0)
 			ph->ph_nhooks--;
 	}
 	if ((err == 0) && (flags & PFIL_OUT)) {
-		err = pfil_list_remove(&ph->ph_out, func, arg);
+		err = pfil_chain_remove(&ph->ph_out, func, arg);
 		if (err == 0)
 			ph->ph_nhooks--;
 	}
@@ -243,8 +315,11 @@
 	return (err);
 }
 
+/*
+ * Internal: Add a new pfil hook into a hook chain.
+ */
 static int
-pfil_list_add(pfil_list_t *list, struct packet_filter_hook *pfh1, int flags)
+pfil_chain_add(pfil_chain_t *chain, struct packet_filter_hook *pfh1, int flags)
 {
 	struct packet_filter_hook *pfh;
 
@@ -251,7 +326,7 @@
 	/*
 	 * First make sure the hook is not already there.
 	 */
-	TAILQ_FOREACH(pfh, list, pfil_link)
+	TAILQ_FOREACH(pfh, chain, pfil_chain)
 		if (pfh->pfil_func == pfh1->pfil_func &&
 		    pfh->pfil_arg == pfh1->pfil_arg)
 			return (EEXIST);
@@ -261,26 +336,23 @@
 	 * the same path is followed in or out of the kernel.
 	 */
 	if (flags & PFIL_IN)
-		TAILQ_INSERT_HEAD(list, pfh1, pfil_link);
+		TAILQ_INSERT_HEAD(chain, pfh1, pfil_chain);
 	else
-		TAILQ_INSERT_TAIL(list, pfh1, pfil_link);
+		TAILQ_INSERT_TAIL(chain, pfh1, pfil_chain);
 	return (0);
 }
 
 /*
- * pfil_list_remove is an internal function that takes a function off the
- * specified list.
+ * Internal: Remove a pfil hook from a hook chain.
  */
 static int
-pfil_list_remove(pfil_list_t *list,
-    int (*func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
-    void *arg)
+pfil_chain_remove(pfil_chain_t *chain, pfil_func_t func, void *arg)
 {
 	struct packet_filter_hook *pfh;
 
-	TAILQ_FOREACH(pfh, list, pfil_link)
+	TAILQ_FOREACH(pfh, chain, pfil_chain)
 		if (pfh->pfil_func == func && pfh->pfil_arg == arg) {
-			TAILQ_REMOVE(list, pfh, pfil_link);
+			TAILQ_REMOVE(chain, pfh, pfil_chain);
 			free(pfh, M_IFADDR);
 			return (0);
 		}
@@ -296,6 +368,7 @@
 {
 
 	LIST_INIT(&V_pfil_head_list);
+	PFIL_LOCK_INIT_REAL(&V_pfil_lock, "shared");
 	return (0);
 }
 
@@ -306,7 +379,9 @@
 vnet_pfil_uninit(const void *unused)
 {
 
-	/*  XXX should panic if list is not empty */
+	KASSERT(LIST_EMPTY(&V_pfil_head_list),
+	    ("%s: pfil_head_list %p not empty", __func__, &V_pfil_head_list));
+	PFIL_LOCK_DESTROY_REAL(&V_pfil_lock);
 	return (0);
 }
 

Modified: trunk/sys/net/pfil.h
===================================================================
--- trunk/sys/net/pfil.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/pfil.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,5 @@
 /* $MidnightBSD$ */
-/*	$FreeBSD: stable/9/sys/net/pfil.h 210121 2010-07-15 14:41:06Z luigi $ */
+/*	$FreeBSD: stable/10/sys/net/pfil.h 254777 2013-08-24 12:03:24Z andre $ */
 /*	$NetBSD: pfil.h,v 1.22 2003/06/23 12:57:08 martin Exp $	*/
 
 /*-
@@ -44,15 +44,18 @@
 struct ifnet;
 struct inpcb;
 
+typedef	int	(*pfil_func_t)(void *, struct mbuf **, struct ifnet *, int,
+		    struct inpcb *);
+
 /*
  * The packet filter hooks are designed for anything to call them to
- * possibly intercept the packet.
+ * possibly intercept the packet.  Multiple filter hooks are chained
+ * together and after each other in the specified order.
  */
 struct packet_filter_hook {
-        TAILQ_ENTRY(packet_filter_hook) pfil_link;
-	int	(*pfil_func)(void *, struct mbuf **, struct ifnet *, int,
-		    struct inpcb *);
-	void	*pfil_arg;
+	TAILQ_ENTRY(packet_filter_hook) pfil_chain;
+	pfil_func_t	 pfil_func;
+	void		*pfil_arg;
 };
 
 #define PFIL_IN		0x00000001
@@ -60,63 +63,87 @@
 #define PFIL_WAITOK	0x00000004
 #define PFIL_ALL	(PFIL_IN|PFIL_OUT)
 
-typedef	TAILQ_HEAD(pfil_list, packet_filter_hook) pfil_list_t;
+typedef	TAILQ_HEAD(pfil_chain, packet_filter_hook) pfil_chain_t;
 
 #define	PFIL_TYPE_AF		1	/* key is AF_* type */
 #define	PFIL_TYPE_IFNET		2	/* key is ifnet pointer */
 
+#define	PFIL_FLAG_PRIVATE_LOCK	0x01	/* Personal lock instead of global */
+
+/*
+ * A pfil head is created by each protocol or packet intercept point.
+ * For packet is then run through the hook chain for inspection.
+ */
 struct pfil_head {
-	pfil_list_t	ph_in;
-	pfil_list_t	ph_out;
-	int		ph_type;
-	int		ph_nhooks;
+	pfil_chain_t	 ph_in;
+	pfil_chain_t	 ph_out;
+	int		 ph_type;
+	int		 ph_nhooks;
 #if defined( __linux__ ) || defined( _WIN32 )
-	rwlock_t	ph_mtx;
+	rwlock_t	 ph_mtx;
 #else
-	struct rmlock	ph_lock;
+	struct rmlock	*ph_plock;	/* Pointer to the used lock */
+	struct rmlock	 ph_lock;	/* Private lock storage */
+	int		 flags;
 #endif
 	union {
-		u_long		phu_val;
-		void		*phu_ptr;
+		u_long	 phu_val;
+		void	*phu_ptr;
 	} ph_un;
-#define	ph_af		ph_un.phu_val
-#define	ph_ifnet	ph_un.phu_ptr
+#define	ph_af		 ph_un.phu_val
+#define	ph_ifnet	 ph_un.phu_ptr
 	LIST_ENTRY(pfil_head) ph_list;
 };
 
-int	pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
-	    int, struct inpcb *), void *, int, struct pfil_head *);
-int	pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
-	    int, struct inpcb *), void *, int, struct pfil_head *);
+/* Public functions for pfil hook management by packet filters. */
+struct pfil_head *pfil_head_get(int, u_long);
+int	pfil_add_hook(pfil_func_t, void *, int, struct pfil_head *);
+int	pfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *);
+#define	PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
+
+/* Public functions to run the packet inspection by protocols. */
 int	pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *,
 	    int, struct inpcb *inp);
 
+/* Public functions for pfil head management by protocols. */
 int	pfil_head_register(struct pfil_head *);
 int	pfil_head_unregister(struct pfil_head *);
 
-struct pfil_head *pfil_head_get(int, u_long);
+/* Public pfil locking functions for self managed locks by packet filters. */
+struct rm_priotracker;	/* Do not require including rmlock header */
+int	pfil_try_rlock(struct pfil_head *, struct rm_priotracker *);
+void	pfil_rlock(struct pfil_head *, struct rm_priotracker *);
+void	pfil_runlock(struct pfil_head *, struct rm_priotracker *);
+void	pfil_wlock(struct pfil_head *);
+void	pfil_wunlock(struct pfil_head *);
+int	pfil_wowned(struct pfil_head *ph);
 
-#define	PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
-#define	PFIL_LOCK_INIT(p) \
-    rm_init_flags(&(p)->ph_lock, "PFil hook read/write mutex", RM_RECURSE)
-#define	PFIL_LOCK_DESTROY(p) rm_destroy(&(p)->ph_lock)
-#define PFIL_RLOCK(p, t) rm_rlock(&(p)->ph_lock, (t))
-#define PFIL_WLOCK(p) rm_wlock(&(p)->ph_lock)
-#define PFIL_RUNLOCK(p, t) rm_runlock(&(p)->ph_lock, (t))
-#define PFIL_WUNLOCK(p) rm_wunlock(&(p)->ph_lock)
-#define PFIL_LIST_LOCK() mtx_lock(&pfil_global_lock)
-#define PFIL_LIST_UNLOCK() mtx_unlock(&pfil_global_lock)
+/* Internal pfil locking functions. */
+#define	PFIL_LOCK_INIT_REAL(l, t)	\
+	rm_init_flags(l, "PFil " t " rmlock", RM_RECURSE)
+#define	PFIL_LOCK_DESTROY_REAL(l)	\
+	rm_destroy(l)
+#define	PFIL_LOCK_INIT(p)	do {			\
+	if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) {	\
+		PFIL_LOCK_INIT_REAL(&(p)->ph_lock, "private");	\
+		(p)->ph_plock = &(p)->ph_lock;		\
+	} else						\
+		(p)->ph_plock = &V_pfil_lock;		\
+} while (0)
+#define	PFIL_LOCK_DESTROY(p)	do {			\
+	if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK)	\
+		PFIL_LOCK_DESTROY_REAL((p)->ph_plock);	\
+} while (0)
 
-static __inline struct packet_filter_hook *
-pfil_hook_get(int dir, struct pfil_head *ph)
-{
+#define	PFIL_TRY_RLOCK(p, t)	rm_try_rlock((p)->ph_plock, (t))
+#define	PFIL_RLOCK(p, t)	rm_rlock((p)->ph_plock, (t))
+#define	PFIL_WLOCK(p)		rm_wlock((p)->ph_plock)
+#define	PFIL_RUNLOCK(p, t)	rm_runlock((p)->ph_plock, (t))
+#define	PFIL_WUNLOCK(p)		rm_wunlock((p)->ph_plock)
+#define	PFIL_WOWNED(p)		rm_wowned((p)->ph_plock)
 
-	if (dir == PFIL_IN)
-		return (TAILQ_FIRST(&ph->ph_in));
-	else if (dir == PFIL_OUT)
-		return (TAILQ_FIRST(&ph->ph_out));
-	else
-		return (NULL);
-}
+/* Internal locking macros for global/vnet pfil_head_list. */
+#define	PFIL_HEADLIST_LOCK()	mtx_lock(&pfil_global_lock)
+#define	PFIL_HEADLIST_UNLOCK()	mtx_unlock(&pfil_global_lock)
 
 #endif /* _NET_PFIL_H_ */

Modified: trunk/sys/net/pfkeyv2.h
===================================================================
--- trunk/sys/net/pfkeyv2.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/pfkeyv2.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,5 @@
 /* $MidnightBSD$ */
-/*	$FreeBSD: stable/9/sys/net/pfkeyv2.h 194062 2009-06-12 15:44:35Z vanhu $	*/
+/*	$FreeBSD: stable/10/sys/net/pfkeyv2.h 194062 2009-06-12 15:44:35Z vanhu $	*/
 /*	$KAME: pfkeyv2.h,v 1.37 2003/09/06 05:15:43 itojun Exp $	*/
 
 /*-

Added: trunk/sys/net/pfvar.h
===================================================================
--- trunk/sys/net/pfvar.h	                        (rev 0)
+++ trunk/sys/net/pfvar.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,1752 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *	$OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $
+ *	$FreeBSD: stable/10/sys/net/pfvar.h 332494 2018-04-13 22:33:18Z kp $
+ */
+
+#ifndef _NET_PFVAR_H_
+#define _NET_PFVAR_H_
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/counter.h>
+#include <sys/refcount.h>
+#include <sys/tree.h>
+
+#include <net/radix.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+
+struct pf_addr {
+	union {
+		struct in_addr		v4;
+		struct in6_addr		v6;
+		u_int8_t		addr8[16];
+		u_int16_t		addr16[8];
+		u_int32_t		addr32[4];
+	} pfa;		    /* 128-bit address */
+#define v4	pfa.v4
+#define v6	pfa.v6
+#define addr8	pfa.addr8
+#define addr16	pfa.addr16
+#define addr32	pfa.addr32
+};
+
+#define PFI_AFLAG_NETWORK	0x01
+#define PFI_AFLAG_BROADCAST	0x02
+#define PFI_AFLAG_PEER		0x04
+#define PFI_AFLAG_MODEMASK	0x07
+#define PFI_AFLAG_NOALIAS	0x08
+
+struct pf_addr_wrap {
+	union {
+		struct {
+			struct pf_addr		 addr;
+			struct pf_addr		 mask;
+		}			 a;
+		char			 ifname[IFNAMSIZ];
+		char			 tblname[PF_TABLE_NAME_SIZE];
+	}			 v;
+	union {
+		struct pfi_dynaddr	*dyn;
+		struct pfr_ktable	*tbl;
+		int			 dyncnt;
+		int			 tblcnt;
+	}			 p;
+	u_int8_t		 type;		/* PF_ADDR_* */
+	u_int8_t		 iflags;	/* PFI_AFLAG_* */
+};
+
+#ifdef _KERNEL
+
+struct pfi_dynaddr {
+	TAILQ_ENTRY(pfi_dynaddr)	 entry;
+	struct pf_addr			 pfid_addr4;
+	struct pf_addr			 pfid_mask4;
+	struct pf_addr			 pfid_addr6;
+	struct pf_addr			 pfid_mask6;
+	struct pfr_ktable		*pfid_kt;
+	struct pfi_kif			*pfid_kif;
+	int				 pfid_net;	/* mask or 128 */
+	int				 pfid_acnt4;	/* address count IPv4 */
+	int				 pfid_acnt6;	/* address count IPv6 */
+	sa_family_t			 pfid_af;	/* rule af */
+	u_int8_t			 pfid_iflags;	/* PFI_AFLAG_* */
+};
+
+/*
+ * Address manipulation macros
+ */
+#define	HTONL(x)	(x) = htonl((__uint32_t)(x))
+#define	HTONS(x)	(x) = htons((__uint16_t)(x))
+#define	NTOHL(x)	(x) = ntohl((__uint32_t)(x))
+#define	NTOHS(x)	(x) = ntohs((__uint16_t)(x))
+
+#define	PF_NAME		"pf"
+
+#define	PF_HASHROW_ASSERT(h)	mtx_assert(&(h)->lock, MA_OWNED)
+#define	PF_HASHROW_LOCK(h)	mtx_lock(&(h)->lock)
+#define	PF_HASHROW_UNLOCK(h)	mtx_unlock(&(h)->lock)
+
+#define	PF_STATE_LOCK(s)						\
+	do {								\
+		struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)];	\
+		PF_HASHROW_LOCK(_ih);					\
+	} while (0)
+
+#define	PF_STATE_UNLOCK(s)						\
+	do {								\
+		struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH((s))];	\
+		PF_HASHROW_UNLOCK(_ih);					\
+	} while (0)
+
+#ifdef INVARIANTS
+#define	PF_STATE_LOCK_ASSERT(s)						\
+	do {								\
+		struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)];	\
+		PF_HASHROW_ASSERT(_ih);					\
+	} while (0)
+#else /* !INVARIANTS */
+#define	PF_STATE_LOCK_ASSERT(s)		do {} while (0)
+#endif /* INVARIANTS */
+
+extern struct mtx pf_unlnkdrules_mtx;
+#define	PF_UNLNKDRULES_LOCK()	mtx_lock(&pf_unlnkdrules_mtx)
+#define	PF_UNLNKDRULES_UNLOCK()	mtx_unlock(&pf_unlnkdrules_mtx)
+
+extern struct rwlock pf_rules_lock;
+#define	PF_RULES_RLOCK()	rw_rlock(&pf_rules_lock)
+#define	PF_RULES_RUNLOCK()	rw_runlock(&pf_rules_lock)
+#define	PF_RULES_WLOCK()	rw_wlock(&pf_rules_lock)
+#define	PF_RULES_WUNLOCK()	rw_wunlock(&pf_rules_lock)
+#define	PF_RULES_ASSERT()	rw_assert(&pf_rules_lock, RA_LOCKED)
+#define	PF_RULES_RASSERT()	rw_assert(&pf_rules_lock, RA_RLOCKED)
+#define	PF_RULES_WASSERT()	rw_assert(&pf_rules_lock, RA_WLOCKED)
+
+#define	PF_MODVER	1
+#define	PFLOG_MODVER	1
+#define	PFSYNC_MODVER	1
+
+#define	PFLOG_MINVER	1
+#define	PFLOG_PREFVER	PFLOG_MODVER
+#define	PFLOG_MAXVER	1
+#define	PFSYNC_MINVER	1
+#define	PFSYNC_PREFVER	PFSYNC_MODVER
+#define	PFSYNC_MAXVER	1
+
+#ifdef INET
+#ifndef INET6
+#define	PF_INET_ONLY
+#endif /* ! INET6 */
+#endif /* INET */
+
+#ifdef INET6
+#ifndef INET
+#define	PF_INET6_ONLY
+#endif /* ! INET */
+#endif /* INET6 */
+
+#ifdef INET
+#ifdef INET6
+#define	PF_INET_INET6
+#endif /* INET6 */
+#endif /* INET */
+
+#else
+
+#define	PF_INET_INET6
+
+#endif /* _KERNEL */
+
+/* Both IPv4 and IPv6 */
+#ifdef PF_INET_INET6
+
+#define PF_AEQ(a, b, c) \
+	((c == AF_INET && (a)->addr32[0] == (b)->addr32[0]) || \
+	(c == AF_INET6 && (a)->addr32[3] == (b)->addr32[3] && \
+	(a)->addr32[2] == (b)->addr32[2] && \
+	(a)->addr32[1] == (b)->addr32[1] && \
+	(a)->addr32[0] == (b)->addr32[0])) \
+
+#define PF_ANEQ(a, b, c) \
+	((c == AF_INET && (a)->addr32[0] != (b)->addr32[0]) || \
+	(c == AF_INET6 && ((a)->addr32[0] != (b)->addr32[0] || \
+	(a)->addr32[1] != (b)->addr32[1] || \
+	(a)->addr32[2] != (b)->addr32[2] || \
+	(a)->addr32[3] != (b)->addr32[3]))) \
+
+#define PF_AZERO(a, c) \
+	((c == AF_INET && !(a)->addr32[0]) || \
+	(c == AF_INET6 && !(a)->addr32[0] && !(a)->addr32[1] && \
+	!(a)->addr32[2] && !(a)->addr32[3] )) \
+
+#define PF_MATCHA(n, a, m, b, f) \
+	pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+	pf_addrcpy(a, b, f)
+
+#define PF_AINC(a, f) \
+	pf_addr_inc(a, f)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+	pf_poolmask(a, b, c, d, f)
+
+#else
+
+/* Just IPv6 */
+
+#ifdef PF_INET6_ONLY
+
+#define PF_AEQ(a, b, c) \
+	((a)->addr32[3] == (b)->addr32[3] && \
+	(a)->addr32[2] == (b)->addr32[2] && \
+	(a)->addr32[1] == (b)->addr32[1] && \
+	(a)->addr32[0] == (b)->addr32[0]) \
+
+#define PF_ANEQ(a, b, c) \
+	((a)->addr32[3] != (b)->addr32[3] || \
+	(a)->addr32[2] != (b)->addr32[2] || \
+	(a)->addr32[1] != (b)->addr32[1] || \
+	(a)->addr32[0] != (b)->addr32[0]) \
+
+#define PF_AZERO(a, c) \
+	(!(a)->addr32[0] && \
+	!(a)->addr32[1] && \
+	!(a)->addr32[2] && \
+	!(a)->addr32[3] ) \
+
+#define PF_MATCHA(n, a, m, b, f) \
+	pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+	pf_addrcpy(a, b, f)
+
+#define PF_AINC(a, f) \
+	pf_addr_inc(a, f)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+	pf_poolmask(a, b, c, d, f)
+
+#else
+
+/* Just IPv4 */
+#ifdef PF_INET_ONLY
+
+#define PF_AEQ(a, b, c) \
+	((a)->addr32[0] == (b)->addr32[0])
+
+#define PF_ANEQ(a, b, c) \
+	((a)->addr32[0] != (b)->addr32[0])
+
+#define PF_AZERO(a, c) \
+	(!(a)->addr32[0])
+
+#define PF_MATCHA(n, a, m, b, f) \
+	pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+	(a)->v4.s_addr = (b)->v4.s_addr
+
+#define PF_AINC(a, f) \
+	do { \
+		(a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \
+	} while (0)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+	do { \
+		(a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \
+		(((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \
+	} while (0)
+
+#endif /* PF_INET_ONLY */
+#endif /* PF_INET6_ONLY */
+#endif /* PF_INET_INET6 */
+
+/*
+ * XXX callers not FIB-aware in our version of pf yet.
+ * OpenBSD fixed it later it seems, 2010/05/07 13:33:16 claudio.
+ */
+#define	PF_MISMATCHAW(aw, x, af, neg, ifp, rtid)			\
+	(								\
+		(((aw)->type == PF_ADDR_NOROUTE &&			\
+		    pf_routable((x), (af), NULL, (rtid))) ||		\
+		(((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL &&	\
+		    pf_routable((x), (af), (ifp), (rtid))) ||		\
+		((aw)->type == PF_ADDR_TABLE &&				\
+		    !pfr_match_addr((aw)->p.tbl, (x), (af))) ||		\
+		((aw)->type == PF_ADDR_DYNIFTL &&			\
+		    !pfi_match_addr((aw)->p.dyn, (x), (af))) ||		\
+		((aw)->type == PF_ADDR_RANGE &&				\
+		    !pf_match_addr_range(&(aw)->v.a.addr,		\
+		    &(aw)->v.a.mask, (x), (af))) ||			\
+		((aw)->type == PF_ADDR_ADDRMASK &&			\
+		    !PF_AZERO(&(aw)->v.a.mask, (af)) &&			\
+		    !PF_MATCHA(0, &(aw)->v.a.addr,			\
+		    &(aw)->v.a.mask, (x), (af))))) !=			\
+		(neg)							\
+	)
+
+
+struct pf_rule_uid {
+	uid_t		 uid[2];
+	u_int8_t	 op;
+};
+
+struct pf_rule_gid {
+	uid_t		 gid[2];
+	u_int8_t	 op;
+};
+
+struct pf_rule_addr {
+	struct pf_addr_wrap	 addr;
+	u_int16_t		 port[2];
+	u_int8_t		 neg;
+	u_int8_t		 port_op;
+};
+
+struct pf_pooladdr {
+	struct pf_addr_wrap		 addr;
+	TAILQ_ENTRY(pf_pooladdr)	 entries;
+	char				 ifname[IFNAMSIZ];
+	struct pfi_kif			*kif;
+};
+
+TAILQ_HEAD(pf_palist, pf_pooladdr);
+
+struct pf_poolhashkey {
+	union {
+		u_int8_t		key8[16];
+		u_int16_t		key16[8];
+		u_int32_t		key32[4];
+	} pfk;		    /* 128-bit hash key */
+#define key8	pfk.key8
+#define key16	pfk.key16
+#define key32	pfk.key32
+};
+
+struct pf_pool {
+	struct pf_palist	 list;
+	struct pf_pooladdr	*cur;
+	struct pf_poolhashkey	 key;
+	struct pf_addr		 counter;
+	int			 tblidx;
+	u_int16_t		 proxy_port[2];
+	u_int8_t		 opts;
+};
+
+
+/* A packed Operating System description for fingerprinting */
+typedef u_int32_t pf_osfp_t;
+#define PF_OSFP_ANY	((pf_osfp_t)0)
+#define PF_OSFP_UNKNOWN	((pf_osfp_t)-1)
+#define PF_OSFP_NOMATCH	((pf_osfp_t)-2)
+
+struct pf_osfp_entry {
+	SLIST_ENTRY(pf_osfp_entry) fp_entry;
+	pf_osfp_t		fp_os;
+	int			fp_enflags;
+#define PF_OSFP_EXPANDED	0x001		/* expanded entry */
+#define PF_OSFP_GENERIC		0x002		/* generic signature */
+#define PF_OSFP_NODETAIL	0x004		/* no p0f details */
+#define PF_OSFP_LEN	32
+	char			fp_class_nm[PF_OSFP_LEN];
+	char			fp_version_nm[PF_OSFP_LEN];
+	char			fp_subtype_nm[PF_OSFP_LEN];
+};
+#define PF_OSFP_ENTRY_EQ(a, b) \
+    ((a)->fp_os == (b)->fp_os && \
+    memcmp((a)->fp_class_nm, (b)->fp_class_nm, PF_OSFP_LEN) == 0 && \
+    memcmp((a)->fp_version_nm, (b)->fp_version_nm, PF_OSFP_LEN) == 0 && \
+    memcmp((a)->fp_subtype_nm, (b)->fp_subtype_nm, PF_OSFP_LEN) == 0)
+
+/* handle pf_osfp_t packing */
+#define _FP_RESERVED_BIT	1  /* For the special negative #defines */
+#define _FP_UNUSED_BITS		1
+#define _FP_CLASS_BITS		10 /* OS Class (Windows, Linux) */
+#define _FP_VERSION_BITS	10 /* OS version (95, 98, NT, 2.4.54, 3.2) */
+#define _FP_SUBTYPE_BITS	10 /* patch level (NT SP4, SP3, ECN patch) */
+#define PF_OSFP_UNPACK(osfp, class, version, subtype) do { \
+	(class) = ((osfp) >> (_FP_VERSION_BITS+_FP_SUBTYPE_BITS)) & \
+	    ((1 << _FP_CLASS_BITS) - 1); \
+	(version) = ((osfp) >> _FP_SUBTYPE_BITS) & \
+	    ((1 << _FP_VERSION_BITS) - 1);\
+	(subtype) = (osfp) & ((1 << _FP_SUBTYPE_BITS) - 1); \
+} while(0)
+#define PF_OSFP_PACK(osfp, class, version, subtype) do { \
+	(osfp) = ((class) & ((1 << _FP_CLASS_BITS) - 1)) << (_FP_VERSION_BITS \
+	    + _FP_SUBTYPE_BITS); \
+	(osfp) |= ((version) & ((1 << _FP_VERSION_BITS) - 1)) << \
+	    _FP_SUBTYPE_BITS; \
+	(osfp) |= (subtype) & ((1 << _FP_SUBTYPE_BITS) - 1); \
+} while(0)
+
+/* the fingerprint of an OSes TCP SYN packet */
+typedef u_int64_t	pf_tcpopts_t;
+struct pf_os_fingerprint {
+	SLIST_HEAD(pf_osfp_enlist, pf_osfp_entry) fp_oses; /* list of matches */
+	pf_tcpopts_t		fp_tcpopts;	/* packed TCP options */
+	u_int16_t		fp_wsize;	/* TCP window size */
+	u_int16_t		fp_psize;	/* ip->ip_len */
+	u_int16_t		fp_mss;		/* TCP MSS */
+	u_int16_t		fp_flags;
+#define PF_OSFP_WSIZE_MOD	0x0001		/* Window modulus */
+#define PF_OSFP_WSIZE_DC	0x0002		/* Window don't care */
+#define PF_OSFP_WSIZE_MSS	0x0004		/* Window multiple of MSS */
+#define PF_OSFP_WSIZE_MTU	0x0008		/* Window multiple of MTU */
+#define PF_OSFP_PSIZE_MOD	0x0010		/* packet size modulus */
+#define PF_OSFP_PSIZE_DC	0x0020		/* packet size don't care */
+#define PF_OSFP_WSCALE		0x0040		/* TCP window scaling */
+#define PF_OSFP_WSCALE_MOD	0x0080		/* TCP window scale modulus */
+#define PF_OSFP_WSCALE_DC	0x0100		/* TCP window scale dont-care */
+#define PF_OSFP_MSS		0x0200		/* TCP MSS */
+#define PF_OSFP_MSS_MOD		0x0400		/* TCP MSS modulus */
+#define PF_OSFP_MSS_DC		0x0800		/* TCP MSS dont-care */
+#define PF_OSFP_DF		0x1000		/* IPv4 don't fragment bit */
+#define PF_OSFP_TS0		0x2000		/* Zero timestamp */
+#define PF_OSFP_INET6		0x4000		/* IPv6 */
+	u_int8_t		fp_optcnt;	/* TCP option count */
+	u_int8_t		fp_wscale;	/* TCP window scaling */
+	u_int8_t		fp_ttl;		/* IPv4 TTL */
+#define PF_OSFP_MAXTTL_OFFSET	40
+/* TCP options packing */
+#define PF_OSFP_TCPOPT_NOP	0x0		/* TCP NOP option */
+#define PF_OSFP_TCPOPT_WSCALE	0x1		/* TCP window scaling option */
+#define PF_OSFP_TCPOPT_MSS	0x2		/* TCP max segment size opt */
+#define PF_OSFP_TCPOPT_SACK	0x3		/* TCP SACK OK option */
+#define PF_OSFP_TCPOPT_TS	0x4		/* TCP timestamp option */
+#define PF_OSFP_TCPOPT_BITS	3		/* bits used by each option */
+#define PF_OSFP_MAX_OPTS \
+    (sizeof(((struct pf_os_fingerprint *)0)->fp_tcpopts) * 8) \
+    / PF_OSFP_TCPOPT_BITS
+
+	SLIST_ENTRY(pf_os_fingerprint)	fp_next;
+};
+
+struct pf_osfp_ioctl {
+	struct pf_osfp_entry	fp_os;
+	pf_tcpopts_t		fp_tcpopts;	/* packed TCP options */
+	u_int16_t		fp_wsize;	/* TCP window size */
+	u_int16_t		fp_psize;	/* ip->ip_len */
+	u_int16_t		fp_mss;		/* TCP MSS */
+	u_int16_t		fp_flags;
+	u_int8_t		fp_optcnt;	/* TCP option count */
+	u_int8_t		fp_wscale;	/* TCP window scaling */
+	u_int8_t		fp_ttl;		/* IPv4 TTL */
+
+	int			fp_getnum;	/* DIOCOSFPGET number */
+};
+
+
+union pf_rule_ptr {
+	struct pf_rule		*ptr;
+	u_int32_t		 nr;
+};
+
+#define	PF_ANCHOR_NAME_SIZE	 64
+
+struct pf_rule {
+	struct pf_rule_addr	 src;
+	struct pf_rule_addr	 dst;
+#define PF_SKIP_IFP		0
+#define PF_SKIP_DIR		1
+#define PF_SKIP_AF		2
+#define PF_SKIP_PROTO		3
+#define PF_SKIP_SRC_ADDR	4
+#define PF_SKIP_SRC_PORT	5
+#define PF_SKIP_DST_ADDR	6
+#define PF_SKIP_DST_PORT	7
+#define PF_SKIP_COUNT		8
+	union pf_rule_ptr	 skip[PF_SKIP_COUNT];
+#define PF_RULE_LABEL_SIZE	 64
+	char			 label[PF_RULE_LABEL_SIZE];
+	char			 ifname[IFNAMSIZ];
+	char			 qname[PF_QNAME_SIZE];
+	char			 pqname[PF_QNAME_SIZE];
+#define	PF_TAG_NAME_SIZE	 64
+	char			 tagname[PF_TAG_NAME_SIZE];
+	char			 match_tagname[PF_TAG_NAME_SIZE];
+
+	char			 overload_tblname[PF_TABLE_NAME_SIZE];
+
+	TAILQ_ENTRY(pf_rule)	 entries;
+	struct pf_pool		 rpool;
+
+	u_int64_t		 evaluations;
+	u_int64_t		 packets[2];
+	u_int64_t		 bytes[2];
+
+	struct pfi_kif		*kif;
+	struct pf_anchor	*anchor;
+	struct pfr_ktable	*overload_tbl;
+
+	pf_osfp_t		 os_fingerprint;
+
+	int			 rtableid;
+	u_int32_t		 timeout[PFTM_MAX];
+	u_int32_t		 max_states;
+	u_int32_t		 max_src_nodes;
+	u_int32_t		 max_src_states;
+	u_int32_t		 max_src_conn;
+	struct {
+		u_int32_t		limit;
+		u_int32_t		seconds;
+	}			 max_src_conn_rate;
+	u_int32_t		 qid;
+	u_int32_t		 pqid;
+	u_int32_t		 rt_listid;
+	u_int32_t		 nr;
+	u_int32_t		 prob;
+	uid_t			 cuid;
+	pid_t			 cpid;
+
+	counter_u64_t		 states_cur;
+	counter_u64_t		 states_tot;
+	counter_u64_t		 src_nodes;
+
+	u_int16_t		 return_icmp;
+	u_int16_t		 return_icmp6;
+	u_int16_t		 max_mss;
+	u_int16_t		 tag;
+	u_int16_t		 match_tag;
+	u_int16_t		 spare2;			/* netgraph */
+
+	struct pf_rule_uid	 uid;
+	struct pf_rule_gid	 gid;
+
+	u_int32_t		 rule_flag;
+	u_int8_t		 action;
+	u_int8_t		 direction;
+	u_int8_t		 log;
+	u_int8_t		 logif;
+	u_int8_t		 quick;
+	u_int8_t		 ifnot;
+	u_int8_t		 match_tag_not;
+	u_int8_t		 natpass;
+
+#define PF_STATE_NORMAL		0x1
+#define PF_STATE_MODULATE	0x2
+#define PF_STATE_SYNPROXY	0x3
+	u_int8_t		 keep_state;
+	sa_family_t		 af;
+	u_int8_t		 proto;
+	u_int8_t		 type;
+	u_int8_t		 code;
+	u_int8_t		 flags;
+	u_int8_t		 flagset;
+	u_int8_t		 min_ttl;
+	u_int8_t		 allow_opts;
+	u_int8_t		 rt;
+	u_int8_t		 return_ttl;
+	u_int8_t		 tos;
+	u_int8_t		 set_tos;
+	u_int8_t		 anchor_relative;
+	u_int8_t		 anchor_wildcard;
+
+#define PF_FLUSH		0x01
+#define PF_FLUSH_GLOBAL		0x02
+	u_int8_t		 flush;
+
+	struct {
+		struct pf_addr		addr;
+		u_int16_t		port;
+	}			divert;
+
+	uint64_t		 u_states_cur;
+	uint64_t		 u_states_tot;
+	uint64_t		 u_src_nodes;
+};
+
+/* rule flags */
+#define	PFRULE_DROP		0x0000
+#define	PFRULE_RETURNRST	0x0001
+#define	PFRULE_FRAGMENT		0x0002
+#define	PFRULE_RETURNICMP	0x0004
+#define	PFRULE_RETURN		0x0008
+#define	PFRULE_NOSYNC		0x0010
+#define PFRULE_SRCTRACK		0x0020  /* track source states */
+#define PFRULE_RULESRCTRACK	0x0040  /* per rule */
+#define	PFRULE_REFS		0x0080	/* rule has references */
+
+/* scrub flags */
+#define	PFRULE_NODF		0x0100
+#define	PFRULE_FRAGCROP		0x0200	/* non-buffering frag cache */
+#define	PFRULE_FRAGDROP		0x0400	/* drop funny fragments */
+#define PFRULE_RANDOMID		0x0800
+#define PFRULE_REASSEMBLE_TCP	0x1000
+#define PFRULE_SET_TOS		0x2000
+
+/* rule flags again */
+#define PFRULE_IFBOUND		0x00010000	/* if-bound */
+#define PFRULE_STATESLOPPY	0x00020000	/* sloppy state tracking */
+
+#define PFSTATE_HIWAT		10000	/* default state table size */
+#define PFSTATE_ADAPT_START	6000	/* default adaptive timeout start */
+#define PFSTATE_ADAPT_END	12000	/* default adaptive timeout end */
+
+
+struct pf_threshold {
+	u_int32_t	limit;
+#define	PF_THRESHOLD_MULT	1000
+#define PF_THRESHOLD_MAX	0xffffffff / PF_THRESHOLD_MULT
+	u_int32_t	seconds;
+	u_int32_t	count;
+	u_int32_t	last;
+};
+
+struct pf_src_node {
+	LIST_ENTRY(pf_src_node) entry;
+	struct pf_addr	 addr;
+	struct pf_addr	 raddr;
+	union pf_rule_ptr rule;
+	struct pfi_kif	*kif;
+	u_int64_t	 bytes[2];
+	u_int64_t	 packets[2];
+	u_int32_t	 states;
+	u_int32_t	 conn;
+	struct pf_threshold	conn_rate;
+	u_int32_t	 creation;
+	u_int32_t	 expire;
+	sa_family_t	 af;
+	u_int8_t	 ruletype;
+};
+
+#define PFSNODE_HIWAT		10000	/* default source node table size */
+
+struct pf_state_scrub {
+	struct timeval	pfss_last;	/* time received last packet	*/
+	u_int32_t	pfss_tsecr;	/* last echoed timestamp	*/
+	u_int32_t	pfss_tsval;	/* largest timestamp		*/
+	u_int32_t	pfss_tsval0;	/* original timestamp		*/
+	u_int16_t	pfss_flags;
+#define PFSS_TIMESTAMP	0x0001		/* modulate timestamp		*/
+#define PFSS_PAWS	0x0010		/* stricter PAWS checks		*/
+#define PFSS_PAWS_IDLED	0x0020		/* was idle too long.  no PAWS	*/
+#define PFSS_DATA_TS	0x0040		/* timestamp on data packets	*/
+#define PFSS_DATA_NOTS	0x0080		/* no timestamp on data packets	*/
+	u_int8_t	pfss_ttl;	/* stashed TTL			*/
+	u_int8_t	pad;
+	u_int32_t	pfss_ts_mod;	/* timestamp modulation		*/
+};
+
+struct pf_state_host {
+	struct pf_addr	addr;
+	u_int16_t	port;
+	u_int16_t	pad;
+};
+
+struct pf_state_peer {
+	struct pf_state_scrub	*scrub;	/* state is scrubbed		*/
+	u_int32_t	seqlo;		/* Max sequence number sent	*/
+	u_int32_t	seqhi;		/* Max the other end ACKd + win	*/
+	u_int32_t	seqdiff;	/* Sequence number modulator	*/
+	u_int16_t	max_win;	/* largest window (pre scaling)	*/
+	u_int16_t	mss;		/* Maximum segment size option	*/
+	u_int8_t	state;		/* active state level		*/
+	u_int8_t	wscale;		/* window scaling factor	*/
+	u_int8_t	tcp_est;	/* Did we reach TCPS_ESTABLISHED */
+	u_int8_t	pad[1];
+};
+
+/* Keep synced with struct pf_state_key. */
+struct pf_state_key_cmp {
+	struct pf_addr	 addr[2];
+	u_int16_t	 port[2];
+	sa_family_t	 af;
+	u_int8_t	 proto;
+	u_int8_t	 pad[2];
+};
+
+struct pf_state_key {
+	struct pf_addr	 addr[2];
+	u_int16_t	 port[2];
+	sa_family_t	 af;
+	u_int8_t	 proto;
+	u_int8_t	 pad[2];
+
+	LIST_ENTRY(pf_state_key) entry;
+	TAILQ_HEAD(, pf_state)	 states[2];
+};
+
+/* Keep synced with struct pf_state. */
+struct pf_state_cmp {
+	u_int64_t		 id;
+	u_int32_t		 creatorid;
+	u_int8_t		 direction;
+	u_int8_t		 pad[3];
+};
+
+struct pf_state {
+	u_int64_t		 id;
+	u_int32_t		 creatorid;
+	u_int8_t		 direction;
+	u_int8_t		 pad[3];
+
+	u_int			 refs;
+	TAILQ_ENTRY(pf_state)	 sync_list;
+	TAILQ_ENTRY(pf_state)	 key_list[2];
+	LIST_ENTRY(pf_state)	 entry;
+	struct pf_state_peer	 src;
+	struct pf_state_peer	 dst;
+	union pf_rule_ptr	 rule;
+	union pf_rule_ptr	 anchor;
+	union pf_rule_ptr	 nat_rule;
+	struct pf_addr		 rt_addr;
+	struct pf_state_key	*key[2];	/* addresses stack and wire  */
+	struct pfi_kif		*kif;
+	struct pfi_kif		*rt_kif;
+	struct pf_src_node	*src_node;
+	struct pf_src_node	*nat_src_node;
+	u_int64_t		 packets[2];
+	u_int64_t		 bytes[2];
+	u_int32_t		 creation;
+	u_int32_t	 	 expire;
+	u_int32_t		 pfsync_time;
+	u_int16_t		 tag;
+	u_int8_t		 log;
+	u_int8_t		 state_flags;
+#define	PFSTATE_ALLOWOPTS	0x01
+#define	PFSTATE_SLOPPY		0x02
+/*  was	PFSTATE_PFLOW		0x04 */
+#define	PFSTATE_NOSYNC		0x08
+#define	PFSTATE_ACK		0x10
+	u_int8_t		 timeout;
+	u_int8_t		 sync_state; /* PFSYNC_S_x */
+
+	/* XXX */
+	u_int8_t		 sync_updates;
+	u_int8_t		_tail[3];
+};
+
+/*
+ * Unified state structures for pulling states out of the kernel
+ * used by pfsync(4) and the pf(4) ioctl.
+ */
+struct pfsync_state_scrub {
+	u_int16_t	pfss_flags;
+	u_int8_t	pfss_ttl;	/* stashed TTL		*/
+#define PFSYNC_SCRUB_FLAG_VALID		0x01
+	u_int8_t	scrub_flag;
+	u_int32_t	pfss_ts_mod;	/* timestamp modulation	*/
+} __packed;
+
+struct pfsync_state_peer {
+	struct pfsync_state_scrub scrub;	/* state is scrubbed	*/
+	u_int32_t	seqlo;		/* Max sequence number sent	*/
+	u_int32_t	seqhi;		/* Max the other end ACKd + win	*/
+	u_int32_t	seqdiff;	/* Sequence number modulator	*/
+	u_int16_t	max_win;	/* largest window (pre scaling)	*/
+	u_int16_t	mss;		/* Maximum segment size option	*/
+	u_int8_t	state;		/* active state level		*/
+	u_int8_t	wscale;		/* window scaling factor	*/
+	u_int8_t	pad[6];
+} __packed;
+
+struct pfsync_state_key {
+	struct pf_addr	 addr[2];
+	u_int16_t	 port[2];
+};
+
+struct pfsync_state {
+	u_int64_t	 id;
+	char		 ifname[IFNAMSIZ];
+	struct pfsync_state_key	key[2];
+	struct pfsync_state_peer src;
+	struct pfsync_state_peer dst;
+	struct pf_addr	 rt_addr;
+	u_int32_t	 rule;
+	u_int32_t	 anchor;
+	u_int32_t	 nat_rule;
+	u_int32_t	 creation;
+	u_int32_t	 expire;
+	u_int32_t	 packets[2][2];
+	u_int32_t	 bytes[2][2];
+	u_int32_t	 creatorid;
+	sa_family_t	 af;
+	u_int8_t	 proto;
+	u_int8_t	 direction;
+	u_int8_t	 __spare[2];
+	u_int8_t	 log;
+	u_int8_t	 state_flags;
+	u_int8_t	 timeout;
+	u_int8_t	 sync_flags;
+	u_int8_t	 updates;
+} __packed;
+
+#ifdef _KERNEL
+/* pfsync */
+typedef int		pfsync_state_import_t(struct pfsync_state *, u_int8_t);
+typedef	void		pfsync_insert_state_t(struct pf_state *);
+typedef	void		pfsync_update_state_t(struct pf_state *);
+typedef	void		pfsync_delete_state_t(struct pf_state *);
+typedef void		pfsync_clear_states_t(u_int32_t, const char *);
+typedef int		pfsync_defer_t(struct pf_state *, struct mbuf *);
+
+extern pfsync_state_import_t	*pfsync_state_import_ptr;
+extern pfsync_insert_state_t	*pfsync_insert_state_ptr;
+extern pfsync_update_state_t	*pfsync_update_state_ptr;
+extern pfsync_delete_state_t	*pfsync_delete_state_ptr;
+extern pfsync_clear_states_t	*pfsync_clear_states_ptr;
+extern pfsync_defer_t		*pfsync_defer_ptr;
+
+void			pfsync_state_export(struct pfsync_state *,
+			    struct pf_state *);
+
+/* pflog */
+struct pf_ruleset;
+struct pf_pdesc;
+typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t,
+    u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *,
+    struct pf_ruleset *, struct pf_pdesc *, int);
+extern pflog_packet_t		*pflog_packet_ptr;
+
+#define	V_pf_end_threads	VNET(pf_end_threads)
+#endif /* _KERNEL */
+
+#define	PFSYNC_FLAG_SRCNODE	0x04
+#define	PFSYNC_FLAG_NATSRCNODE	0x08
+
+/* for copies to/from network byte order */
+/* ioctl interface also uses network byte order */
+#define pf_state_peer_hton(s,d) do {		\
+	(d)->seqlo = htonl((s)->seqlo);		\
+	(d)->seqhi = htonl((s)->seqhi);		\
+	(d)->seqdiff = htonl((s)->seqdiff);	\
+	(d)->max_win = htons((s)->max_win);	\
+	(d)->mss = htons((s)->mss);		\
+	(d)->state = (s)->state;		\
+	(d)->wscale = (s)->wscale;		\
+	if ((s)->scrub) {						\
+		(d)->scrub.pfss_flags = 				\
+		    htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP);	\
+		(d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl;		\
+		(d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\
+		(d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID;	\
+	}								\
+} while (0)
+
+#define pf_state_peer_ntoh(s,d) do {		\
+	(d)->seqlo = ntohl((s)->seqlo);		\
+	(d)->seqhi = ntohl((s)->seqhi);		\
+	(d)->seqdiff = ntohl((s)->seqdiff);	\
+	(d)->max_win = ntohs((s)->max_win);	\
+	(d)->mss = ntohs((s)->mss);		\
+	(d)->state = (s)->state;		\
+	(d)->wscale = (s)->wscale;		\
+	if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && 	\
+	    (d)->scrub != NULL) {					\
+		(d)->scrub->pfss_flags =				\
+		    ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP;	\
+		(d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl;		\
+		(d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\
+	}								\
+} while (0)
+
+#define pf_state_counter_hton(s,d) do {				\
+	d[0] = htonl((s>>32)&0xffffffff);			\
+	d[1] = htonl(s&0xffffffff);				\
+} while (0)
+
+#define pf_state_counter_from_pfsync(s)				\
+	(((u_int64_t)(s[0])<<32) | (u_int64_t)(s[1]))
+
+#define pf_state_counter_ntoh(s,d) do {				\
+	d = ntohl(s[0]);					\
+	d = d<<32;						\
+	d += ntohl(s[1]);					\
+} while (0)
+
+TAILQ_HEAD(pf_rulequeue, pf_rule);
+
+struct pf_anchor;
+
+struct pf_ruleset {
+	struct {
+		struct pf_rulequeue	 queues[2];
+		struct {
+			struct pf_rulequeue	*ptr;
+			struct pf_rule		**ptr_array;
+			u_int32_t		 rcount;
+			u_int32_t		 ticket;
+			int			 open;
+		}			 active, inactive;
+	}			 rules[PF_RULESET_MAX];
+	struct pf_anchor	*anchor;
+	u_int32_t		 tticket;
+	int			 tables;
+	int			 topen;
+};
+
+RB_HEAD(pf_anchor_global, pf_anchor);
+RB_HEAD(pf_anchor_node, pf_anchor);
+struct pf_anchor {
+	RB_ENTRY(pf_anchor)	 entry_global;
+	RB_ENTRY(pf_anchor)	 entry_node;
+	struct pf_anchor	*parent;
+	struct pf_anchor_node	 children;
+	char			 name[PF_ANCHOR_NAME_SIZE];
+	char			 path[MAXPATHLEN];
+	struct pf_ruleset	 ruleset;
+	int			 refcnt;	/* anchor rules */
+	int			 match;	/* XXX: used for pfctl black magic */
+};
+RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare);
+RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare);
+
+#define PF_RESERVED_ANCHOR	"_pf"
+
+#define PFR_TFLAG_PERSIST	0x00000001
+#define PFR_TFLAG_CONST		0x00000002
+#define PFR_TFLAG_ACTIVE	0x00000004
+#define PFR_TFLAG_INACTIVE	0x00000008
+#define PFR_TFLAG_REFERENCED	0x00000010
+#define PFR_TFLAG_REFDANCHOR	0x00000020
+#define PFR_TFLAG_COUNTERS	0x00000040
+/* Adjust masks below when adding flags. */
+#define PFR_TFLAG_USRMASK	(PFR_TFLAG_PERSIST	| \
+				 PFR_TFLAG_CONST	| \
+				 PFR_TFLAG_COUNTERS)
+#define PFR_TFLAG_SETMASK	(PFR_TFLAG_ACTIVE	| \
+				 PFR_TFLAG_INACTIVE	| \
+				 PFR_TFLAG_REFERENCED	| \
+				 PFR_TFLAG_REFDANCHOR)
+#define PFR_TFLAG_ALLMASK	(PFR_TFLAG_PERSIST	| \
+				 PFR_TFLAG_CONST	| \
+				 PFR_TFLAG_ACTIVE	| \
+				 PFR_TFLAG_INACTIVE	| \
+				 PFR_TFLAG_REFERENCED	| \
+				 PFR_TFLAG_REFDANCHOR	| \
+				 PFR_TFLAG_COUNTERS)
+
+struct pf_anchor_stackframe;
+
+struct pfr_table {
+	char			 pfrt_anchor[MAXPATHLEN];
+	char			 pfrt_name[PF_TABLE_NAME_SIZE];
+	u_int32_t		 pfrt_flags;
+	u_int8_t		 pfrt_fback;
+};
+
+enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED,
+	PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE,
+	PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_NOCOUNT, PFR_FB_MAX };
+
+struct pfr_addr {
+	union {
+		struct in_addr	 _pfra_ip4addr;
+		struct in6_addr	 _pfra_ip6addr;
+	}		 pfra_u;
+	u_int8_t	 pfra_af;
+	u_int8_t	 pfra_net;
+	u_int8_t	 pfra_not;
+	u_int8_t	 pfra_fback;
+};
+#define	pfra_ip4addr	pfra_u._pfra_ip4addr
+#define	pfra_ip6addr	pfra_u._pfra_ip6addr
+
+enum { PFR_DIR_IN, PFR_DIR_OUT, PFR_DIR_MAX };
+enum { PFR_OP_BLOCK, PFR_OP_PASS, PFR_OP_ADDR_MAX, PFR_OP_TABLE_MAX };
+#define PFR_OP_XPASS	PFR_OP_ADDR_MAX
+
+struct pfr_astats {
+	struct pfr_addr	 pfras_a;
+	u_int64_t	 pfras_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+	u_int64_t	 pfras_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+	long		 pfras_tzero;
+};
+
+enum { PFR_REFCNT_RULE, PFR_REFCNT_ANCHOR, PFR_REFCNT_MAX };
+
+struct pfr_tstats {
+	struct pfr_table pfrts_t;
+	u_int64_t	 pfrts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+	u_int64_t	 pfrts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+	u_int64_t	 pfrts_match;
+	u_int64_t	 pfrts_nomatch;
+	long		 pfrts_tzero;
+	int		 pfrts_cnt;
+	int		 pfrts_refcnt[PFR_REFCNT_MAX];
+};
+#define	pfrts_name	pfrts_t.pfrt_name
+#define pfrts_flags	pfrts_t.pfrt_flags
+
+#ifndef _SOCKADDR_UNION_DEFINED
+#define	_SOCKADDR_UNION_DEFINED
+union sockaddr_union {
+	struct sockaddr		sa;
+	struct sockaddr_in	sin;
+	struct sockaddr_in6	sin6;
+};
+#endif /* _SOCKADDR_UNION_DEFINED */
+
+struct pfr_kcounters {
+	u_int64_t		 pfrkc_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+	u_int64_t		 pfrkc_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+};
+
+SLIST_HEAD(pfr_kentryworkq, pfr_kentry);
+struct pfr_kentry {
+	struct radix_node	 pfrke_node[2];
+	union sockaddr_union	 pfrke_sa;
+	SLIST_ENTRY(pfr_kentry)	 pfrke_workq;
+	struct pfr_kcounters	*pfrke_counters;
+	long			 pfrke_tzero;
+	u_int8_t		 pfrke_af;
+	u_int8_t		 pfrke_net;
+	u_int8_t		 pfrke_not;
+	u_int8_t		 pfrke_mark;
+};
+
+SLIST_HEAD(pfr_ktableworkq, pfr_ktable);
+RB_HEAD(pfr_ktablehead, pfr_ktable);
+struct pfr_ktable {
+	struct pfr_tstats	 pfrkt_ts;
+	RB_ENTRY(pfr_ktable)	 pfrkt_tree;
+	SLIST_ENTRY(pfr_ktable)	 pfrkt_workq;
+	struct radix_node_head	*pfrkt_ip4;
+	struct radix_node_head	*pfrkt_ip6;
+	struct pfr_ktable	*pfrkt_shadow;
+	struct pfr_ktable	*pfrkt_root;
+	struct pf_ruleset	*pfrkt_rs;
+	long			 pfrkt_larg;
+	int			 pfrkt_nflags;
+};
+#define pfrkt_t		pfrkt_ts.pfrts_t
+#define pfrkt_name	pfrkt_t.pfrt_name
+#define pfrkt_anchor	pfrkt_t.pfrt_anchor
+#define pfrkt_ruleset	pfrkt_t.pfrt_ruleset
+#define pfrkt_flags	pfrkt_t.pfrt_flags
+#define pfrkt_cnt	pfrkt_ts.pfrts_cnt
+#define pfrkt_refcnt	pfrkt_ts.pfrts_refcnt
+#define pfrkt_packets	pfrkt_ts.pfrts_packets
+#define pfrkt_bytes	pfrkt_ts.pfrts_bytes
+#define pfrkt_match	pfrkt_ts.pfrts_match
+#define pfrkt_nomatch	pfrkt_ts.pfrts_nomatch
+#define pfrkt_tzero	pfrkt_ts.pfrts_tzero
+
+/* keep synced with pfi_kif, used in RB_FIND */
+struct pfi_kif_cmp {
+	char				 pfik_name[IFNAMSIZ];
+};
+
+struct pfi_kif {
+	char				 pfik_name[IFNAMSIZ];
+	union {
+		RB_ENTRY(pfi_kif)	 _pfik_tree;
+		LIST_ENTRY(pfi_kif)	 _pfik_list;
+	} _pfik_glue;
+#define	pfik_tree	_pfik_glue._pfik_tree
+#define	pfik_list	_pfik_glue._pfik_list
+	u_int64_t			 pfik_packets[2][2][2];
+	u_int64_t			 pfik_bytes[2][2][2];
+	u_int32_t			 pfik_tzero;
+	u_int				 pfik_flags;
+	struct ifnet			*pfik_ifp;
+	struct ifg_group		*pfik_group;
+	u_int				 pfik_rulerefs;
+	TAILQ_HEAD(, pfi_dynaddr)	 pfik_dynaddrs;
+};
+
+#define	PFI_IFLAG_REFS		0x0001	/* has state references */
+#define PFI_IFLAG_SKIP		0x0100	/* skip filtering on interface */
+
+struct pf_pdesc {
+	struct {
+		int	 done;
+		uid_t	 uid;
+		gid_t	 gid;
+	}		 lookup;
+	u_int64_t	 tot_len;	/* Make Mickey money */
+	union {
+		struct tcphdr		*tcp;
+		struct udphdr		*udp;
+		struct icmp		*icmp;
+#ifdef INET6
+		struct icmp6_hdr	*icmp6;
+#endif /* INET6 */
+		void			*any;
+	} hdr;
+
+	struct pf_rule	*nat_rule;	/* nat/rdr rule applied to packet */
+	struct pf_addr	*src;		/* src address */
+	struct pf_addr	*dst;		/* dst address */
+	u_int16_t *sport;
+	u_int16_t *dport;
+	struct pf_mtag	*pf_mtag;
+
+	u_int32_t	 p_len;		/* total length of payload */
+
+	u_int16_t	*ip_sum;
+	u_int16_t	*proto_sum;
+	u_int16_t	 flags;		/* Let SCRUB trigger behavior in
+					 * state code. Easier than tags */
+#define PFDESC_TCP_NORM	0x0001		/* TCP shall be statefully scrubbed */
+#define PFDESC_IP_REAS	0x0002		/* IP frags would've been reassembled */
+	sa_family_t	 af;
+	u_int8_t	 proto;
+	u_int8_t	 tos;
+	u_int8_t	 dir;		/* direction */
+	u_int8_t	 sidx;		/* key index for source */
+	u_int8_t	 didx;		/* key index for destination */
+};
+
+/* flags for RDR options */
+#define PF_DPORT_RANGE	0x01		/* Dest port uses range */
+#define PF_RPORT_RANGE	0x02		/* RDR'ed port uses range */
+
+/* UDP state enumeration */
+#define PFUDPS_NO_TRAFFIC	0
+#define PFUDPS_SINGLE		1
+#define PFUDPS_MULTIPLE		2
+
+#define PFUDPS_NSTATES		3	/* number of state levels */
+
+#define PFUDPS_NAMES { \
+	"NO_TRAFFIC", \
+	"SINGLE", \
+	"MULTIPLE", \
+	NULL \
+}
+
+/* Other protocol state enumeration */
+#define PFOTHERS_NO_TRAFFIC	0
+#define PFOTHERS_SINGLE		1
+#define PFOTHERS_MULTIPLE	2
+
+#define PFOTHERS_NSTATES	3	/* number of state levels */
+
+#define PFOTHERS_NAMES { \
+	"NO_TRAFFIC", \
+	"SINGLE", \
+	"MULTIPLE", \
+	NULL \
+}
+
+#define ACTION_SET(a, x) \
+	do { \
+		if ((a) != NULL) \
+			*(a) = (x); \
+	} while (0)
+
+#define REASON_SET(a, x) \
+	do { \
+		if ((a) != NULL) \
+			*(a) = (x); \
+		if (x < PFRES_MAX) \
+			counter_u64_add(V_pf_status.counters[x], 1); \
+	} while (0)
+
+struct pf_kstatus {
+	counter_u64_t	counters[PFRES_MAX]; /* reason for passing/dropping */
+	counter_u64_t	lcounters[LCNT_MAX]; /* limit counters */
+	counter_u64_t	fcounters[FCNT_MAX]; /* state operation counters */
+	counter_u64_t	scounters[SCNT_MAX]; /* src_node operation counters */
+	uint32_t	states;
+	uint32_t	src_nodes;
+	uint32_t	running;
+	uint32_t	since;
+	uint32_t	debug;
+	uint32_t	hostid;
+	char		ifname[IFNAMSIZ];
+	uint8_t		pf_chksum[PF_MD5_DIGEST_LENGTH];
+};
+
+struct pf_divert {
+	union {
+		struct in_addr	ipv4;
+		struct in6_addr	ipv6;
+	}		addr;
+	u_int16_t	port;
+};
+
+#define PFFRAG_FRENT_HIWAT	5000	/* Number of fragment entries */
+#define PFR_KENTRY_HIWAT	200000	/* Number of table entries */
+
+/*
+ * ioctl parameter structures
+ */
+
+struct pfioc_pooladdr {
+	u_int32_t		 action;
+	u_int32_t		 ticket;
+	u_int32_t		 nr;
+	u_int32_t		 r_num;
+	u_int8_t		 r_action;
+	u_int8_t		 r_last;
+	u_int8_t		 af;
+	char			 anchor[MAXPATHLEN];
+	struct pf_pooladdr	 addr;
+};
+
+struct pfioc_rule {
+	u_int32_t	 action;
+	u_int32_t	 ticket;
+	u_int32_t	 pool_ticket;
+	u_int32_t	 nr;
+	char		 anchor[MAXPATHLEN];
+	char		 anchor_call[MAXPATHLEN];
+	struct pf_rule	 rule;
+};
+
+struct pfioc_natlook {
+	struct pf_addr	 saddr;
+	struct pf_addr	 daddr;
+	struct pf_addr	 rsaddr;
+	struct pf_addr	 rdaddr;
+	u_int16_t	 sport;
+	u_int16_t	 dport;
+	u_int16_t	 rsport;
+	u_int16_t	 rdport;
+	sa_family_t	 af;
+	u_int8_t	 proto;
+	u_int8_t	 direction;
+};
+
+struct pfioc_state {
+	struct pfsync_state	state;
+};
+
+struct pfioc_src_node_kill {
+	sa_family_t psnk_af;
+	struct pf_rule_addr psnk_src;
+	struct pf_rule_addr psnk_dst;
+	u_int		    psnk_killed;
+};
+
+struct pfioc_state_kill {
+	struct pf_state_cmp	psk_pfcmp;
+	sa_family_t		psk_af;
+	int			psk_proto;
+	struct pf_rule_addr	psk_src;
+	struct pf_rule_addr	psk_dst;
+	char			psk_ifname[IFNAMSIZ];
+	char			psk_label[PF_RULE_LABEL_SIZE];
+	u_int			psk_killed;
+};
+
+struct pfioc_states {
+	int	ps_len;
+	union {
+		caddr_t			 psu_buf;
+		struct pfsync_state	*psu_states;
+	} ps_u;
+#define ps_buf		ps_u.psu_buf
+#define ps_states	ps_u.psu_states
+};
+
+struct pfioc_src_nodes {
+	int	psn_len;
+	union {
+		caddr_t		 psu_buf;
+		struct pf_src_node	*psu_src_nodes;
+	} psn_u;
+#define psn_buf		psn_u.psu_buf
+#define psn_src_nodes	psn_u.psu_src_nodes
+};
+
+struct pfioc_if {
+	char		 ifname[IFNAMSIZ];
+};
+
+struct pfioc_tm {
+	int		 timeout;
+	int		 seconds;
+};
+
+struct pfioc_limit {
+	int		 index;
+	unsigned	 limit;
+};
+
+struct pfioc_altq {
+	u_int32_t	 action;
+	u_int32_t	 ticket;
+	u_int32_t	 nr;
+	struct pf_altq	 altq;
+};
+
+struct pfioc_qstats {
+	u_int32_t	 ticket;
+	u_int32_t	 nr;
+	void		*buf;
+	int		 nbytes;
+	u_int8_t	 scheduler;
+};
+
+struct pfioc_ruleset {
+	u_int32_t	 nr;
+	char		 path[MAXPATHLEN];
+	char		 name[PF_ANCHOR_NAME_SIZE];
+};
+
+#define PF_RULESET_ALTQ		(PF_RULESET_MAX)
+#define PF_RULESET_TABLE	(PF_RULESET_MAX+1)
+struct pfioc_trans {
+	int		 size;	/* number of elements */
+	int		 esize; /* size of each element in bytes */
+	struct pfioc_trans_e {
+		int		rs_num;
+		char		anchor[MAXPATHLEN];
+		u_int32_t	ticket;
+	}		*array;
+};
+
+#define PFR_FLAG_ATOMIC		0x00000001	/* unused */
+#define PFR_FLAG_DUMMY		0x00000002
+#define PFR_FLAG_FEEDBACK	0x00000004
+#define PFR_FLAG_CLSTATS	0x00000008
+#define PFR_FLAG_ADDRSTOO	0x00000010
+#define PFR_FLAG_REPLACE	0x00000020
+#define PFR_FLAG_ALLRSETS	0x00000040
+#define PFR_FLAG_ALLMASK	0x0000007F
+#ifdef _KERNEL
+#define PFR_FLAG_USERIOCTL	0x10000000
+#endif
+
+struct pfioc_table {
+	struct pfr_table	 pfrio_table;
+	void			*pfrio_buffer;
+	int			 pfrio_esize;
+	int			 pfrio_size;
+	int			 pfrio_size2;
+	int			 pfrio_nadd;
+	int			 pfrio_ndel;
+	int			 pfrio_nchange;
+	int			 pfrio_flags;
+	u_int32_t		 pfrio_ticket;
+};
+#define	pfrio_exists	pfrio_nadd
+#define	pfrio_nzero	pfrio_nadd
+#define	pfrio_nmatch	pfrio_nadd
+#define pfrio_naddr	pfrio_size2
+#define pfrio_setflag	pfrio_size2
+#define pfrio_clrflag	pfrio_nadd
+
+struct pfioc_iface {
+	char	 pfiio_name[IFNAMSIZ];
+	void	*pfiio_buffer;
+	int	 pfiio_esize;
+	int	 pfiio_size;
+	int	 pfiio_nzero;
+	int	 pfiio_flags;
+};
+
+
+/*
+ * ioctl operations
+ */
+
+#define DIOCSTART	_IO  ('D',  1)
+#define DIOCSTOP	_IO  ('D',  2)
+#define DIOCADDRULE	_IOWR('D',  4, struct pfioc_rule)
+#define DIOCGETRULES	_IOWR('D',  6, struct pfioc_rule)
+#define DIOCGETRULE	_IOWR('D',  7, struct pfioc_rule)
+/* XXX cut 8 - 17 */
+#define DIOCCLRSTATES	_IOWR('D', 18, struct pfioc_state_kill)
+#define DIOCGETSTATE	_IOWR('D', 19, struct pfioc_state)
+#define DIOCSETSTATUSIF _IOWR('D', 20, struct pfioc_if)
+#define DIOCGETSTATUS	_IOWR('D', 21, struct pf_status)
+#define DIOCCLRSTATUS	_IO  ('D', 22)
+#define DIOCNATLOOK	_IOWR('D', 23, struct pfioc_natlook)
+#define DIOCSETDEBUG	_IOWR('D', 24, u_int32_t)
+#define DIOCGETSTATES	_IOWR('D', 25, struct pfioc_states)
+#define DIOCCHANGERULE	_IOWR('D', 26, struct pfioc_rule)
+/* XXX cut 26 - 28 */
+#define DIOCSETTIMEOUT	_IOWR('D', 29, struct pfioc_tm)
+#define DIOCGETTIMEOUT	_IOWR('D', 30, struct pfioc_tm)
+#define DIOCADDSTATE	_IOWR('D', 37, struct pfioc_state)
+#define DIOCCLRRULECTRS	_IO  ('D', 38)
+#define DIOCGETLIMIT	_IOWR('D', 39, struct pfioc_limit)
+#define DIOCSETLIMIT	_IOWR('D', 40, struct pfioc_limit)
+#define DIOCKILLSTATES	_IOWR('D', 41, struct pfioc_state_kill)
+#define DIOCSTARTALTQ	_IO  ('D', 42)
+#define DIOCSTOPALTQ	_IO  ('D', 43)
+#define DIOCADDALTQ	_IOWR('D', 45, struct pfioc_altq)
+#define DIOCGETALTQS	_IOWR('D', 47, struct pfioc_altq)
+#define DIOCGETALTQ	_IOWR('D', 48, struct pfioc_altq)
+#define DIOCCHANGEALTQ	_IOWR('D', 49, struct pfioc_altq)
+#define DIOCGETQSTATS	_IOWR('D', 50, struct pfioc_qstats)
+#define DIOCBEGINADDRS	_IOWR('D', 51, struct pfioc_pooladdr)
+#define DIOCADDADDR	_IOWR('D', 52, struct pfioc_pooladdr)
+#define DIOCGETADDRS	_IOWR('D', 53, struct pfioc_pooladdr)
+#define DIOCGETADDR	_IOWR('D', 54, struct pfioc_pooladdr)
+#define DIOCCHANGEADDR	_IOWR('D', 55, struct pfioc_pooladdr)
+/* XXX cut 55 - 57 */
+#define	DIOCGETRULESETS	_IOWR('D', 58, struct pfioc_ruleset)
+#define	DIOCGETRULESET	_IOWR('D', 59, struct pfioc_ruleset)
+#define	DIOCRCLRTABLES	_IOWR('D', 60, struct pfioc_table)
+#define	DIOCRADDTABLES	_IOWR('D', 61, struct pfioc_table)
+#define	DIOCRDELTABLES	_IOWR('D', 62, struct pfioc_table)
+#define	DIOCRGETTABLES	_IOWR('D', 63, struct pfioc_table)
+#define	DIOCRGETTSTATS	_IOWR('D', 64, struct pfioc_table)
+#define DIOCRCLRTSTATS	_IOWR('D', 65, struct pfioc_table)
+#define	DIOCRCLRADDRS	_IOWR('D', 66, struct pfioc_table)
+#define	DIOCRADDADDRS	_IOWR('D', 67, struct pfioc_table)
+#define	DIOCRDELADDRS	_IOWR('D', 68, struct pfioc_table)
+#define	DIOCRSETADDRS	_IOWR('D', 69, struct pfioc_table)
+#define	DIOCRGETADDRS	_IOWR('D', 70, struct pfioc_table)
+#define	DIOCRGETASTATS	_IOWR('D', 71, struct pfioc_table)
+#define	DIOCRCLRASTATS	_IOWR('D', 72, struct pfioc_table)
+#define	DIOCRTSTADDRS	_IOWR('D', 73, struct pfioc_table)
+#define	DIOCRSETTFLAGS	_IOWR('D', 74, struct pfioc_table)
+#define	DIOCRINADEFINE	_IOWR('D', 77, struct pfioc_table)
+#define	DIOCOSFPFLUSH	_IO('D', 78)
+#define	DIOCOSFPADD	_IOWR('D', 79, struct pf_osfp_ioctl)
+#define	DIOCOSFPGET	_IOWR('D', 80, struct pf_osfp_ioctl)
+#define	DIOCXBEGIN	_IOWR('D', 81, struct pfioc_trans)
+#define	DIOCXCOMMIT	_IOWR('D', 82, struct pfioc_trans)
+#define	DIOCXROLLBACK	_IOWR('D', 83, struct pfioc_trans)
+#define	DIOCGETSRCNODES	_IOWR('D', 84, struct pfioc_src_nodes)
+#define	DIOCCLRSRCNODES	_IO('D', 85)
+#define	DIOCSETHOSTID	_IOWR('D', 86, u_int32_t)
+#define	DIOCIGETIFACES	_IOWR('D', 87, struct pfioc_iface)
+#define	DIOCSETIFFLAG	_IOWR('D', 89, struct pfioc_iface)
+#define	DIOCCLRIFFLAG	_IOWR('D', 90, struct pfioc_iface)
+#define	DIOCKILLSRCNODES	_IOWR('D', 91, struct pfioc_src_node_kill)
+struct pf_ifspeed {
+	char			ifname[IFNAMSIZ];
+	u_int32_t		baudrate;
+};
+#define	DIOCGIFSPEED	_IOWR('D', 92, struct pf_ifspeed)
+
+#ifdef _KERNEL
+LIST_HEAD(pf_src_node_list, pf_src_node);
+struct pf_srchash {
+	struct pf_src_node_list		nodes;
+	struct mtx			lock;
+};
+
+struct pf_keyhash {
+	LIST_HEAD(, pf_state_key)	keys;
+	struct mtx			lock;
+};
+
+struct pf_idhash {
+	LIST_HEAD(, pf_state)		states;
+	struct mtx			lock;
+};
+
+extern u_long		pf_hashmask;
+extern u_long		pf_srchashmask;
+#define	PF_HASHSIZ	(32768)
+#define	PF_SRCHASHSIZ	(PF_HASHSIZ/4)
+VNET_DECLARE(struct pf_keyhash *, pf_keyhash);
+VNET_DECLARE(struct pf_idhash *, pf_idhash);
+#define V_pf_keyhash	VNET(pf_keyhash)
+#define	V_pf_idhash	VNET(pf_idhash)
+VNET_DECLARE(struct pf_srchash *, pf_srchash);
+#define	V_pf_srchash	VNET(pf_srchash)
+
+#define PF_IDHASH(s)	(be64toh((s)->id) % (pf_hashmask + 1))
+
+VNET_DECLARE(void *, pf_swi_cookie);
+#define V_pf_swi_cookie	VNET(pf_swi_cookie)
+
+VNET_DECLARE(uint64_t, pf_stateid[MAXCPU]);
+#define	V_pf_stateid	VNET(pf_stateid)
+
+TAILQ_HEAD(pf_altqqueue, pf_altq);
+VNET_DECLARE(struct pf_altqqueue,	 pf_altqs[2]);
+#define	V_pf_altqs			 VNET(pf_altqs)
+VNET_DECLARE(struct pf_palist,		 pf_pabuf);
+#define	V_pf_pabuf			 VNET(pf_pabuf)
+
+VNET_DECLARE(u_int32_t,			 ticket_altqs_active);
+#define	V_ticket_altqs_active		 VNET(ticket_altqs_active)
+VNET_DECLARE(u_int32_t,			 ticket_altqs_inactive);
+#define	V_ticket_altqs_inactive		 VNET(ticket_altqs_inactive)
+VNET_DECLARE(int,			 altqs_inactive_open);
+#define	V_altqs_inactive_open		 VNET(altqs_inactive_open)
+VNET_DECLARE(u_int32_t,			 ticket_pabuf);
+#define	V_ticket_pabuf			 VNET(ticket_pabuf)
+VNET_DECLARE(struct pf_altqqueue *,	 pf_altqs_active);
+#define	V_pf_altqs_active		 VNET(pf_altqs_active)
+VNET_DECLARE(struct pf_altqqueue *,	 pf_altqs_inactive);
+#define	V_pf_altqs_inactive		 VNET(pf_altqs_inactive)
+
+VNET_DECLARE(struct pf_rulequeue, pf_unlinked_rules);
+#define	V_pf_unlinked_rules	VNET(pf_unlinked_rules)
+
+void				 pf_initialize(void);
+void				 pf_mtag_initialize(void);
+void				 pf_mtag_cleanup(void);
+void				 pf_cleanup(void);
+
+struct pf_mtag			*pf_get_mtag(struct mbuf *);
+
+extern void			 pf_calc_skip_steps(struct pf_rulequeue *);
+#ifdef ALTQ
+extern	void			 pf_altq_ifnet_event(struct ifnet *, int);
+#endif
+VNET_DECLARE(uma_zone_t,	 pf_state_z);
+#define	V_pf_state_z		 VNET(pf_state_z)
+VNET_DECLARE(uma_zone_t,	 pf_state_key_z);
+#define	V_pf_state_key_z	 VNET(pf_state_key_z)
+VNET_DECLARE(uma_zone_t,	 pf_state_scrub_z);
+#define	V_pf_state_scrub_z	 VNET(pf_state_scrub_z)
+
+extern void			 pf_purge_thread(void *);
+extern void			 pf_intr(void *);
+extern void			 pf_purge_expired_src_nodes(void);
+
+extern int			 pf_unlink_state(struct pf_state *, u_int);
+#define	PF_ENTER_LOCKED		0x00000001
+#define	PF_RETURN_LOCKED	0x00000002
+extern int			 pf_state_insert(struct pfi_kif *,
+				    struct pf_state_key *,
+				    struct pf_state_key *,
+				    struct pf_state *);
+extern void			 pf_free_state(struct pf_state *);
+
+static __inline void
+pf_ref_state(struct pf_state *s)
+{
+
+	refcount_acquire(&s->refs);
+}
+
+static __inline int
+pf_release_state(struct pf_state *s)
+{
+
+	if (refcount_release(&s->refs)) {
+		pf_free_state(s);
+		return (1);
+	} else
+		return (0);
+}
+
+extern struct pf_state		*pf_find_state_byid(uint64_t, uint32_t);
+extern struct pf_state		*pf_find_state_all(struct pf_state_key_cmp *,
+				    u_int, int *);
+extern struct pf_src_node	*pf_find_src_node(struct pf_addr *,
+				    struct pf_rule *, sa_family_t, int);
+extern void			 pf_unlink_src_node(struct pf_src_node *);
+extern u_int			 pf_free_src_nodes(struct pf_src_node_list *);
+extern void			 pf_print_state(struct pf_state *);
+extern void			 pf_print_flags(u_int8_t);
+extern u_int16_t		 pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
+				    u_int8_t);
+extern u_int16_t		 pf_proto_cksum_fixup(struct mbuf *, u_int16_t,
+				    u_int16_t, u_int16_t, u_int8_t);
+
+VNET_DECLARE(struct ifnet *,		 sync_ifp);
+#define	V_sync_ifp		 	 VNET(sync_ifp);
+VNET_DECLARE(struct pf_rule,		 pf_default_rule);
+#define	V_pf_default_rule		  VNET(pf_default_rule)
+extern void			 pf_addrcpy(struct pf_addr *, struct pf_addr *,
+				    u_int8_t);
+void				pf_free_rule(struct pf_rule *);
+
+#ifdef INET
+int	pf_test(int, struct ifnet *, struct mbuf **, struct inpcb *);
+int	pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *,
+	    struct pf_pdesc *);
+#endif /* INET */
+
+#ifdef INET6
+int	pf_test6(int, struct ifnet *, struct mbuf **, struct inpcb *);
+int	pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *,
+	    struct pf_pdesc *);
+void	pf_poolmask(struct pf_addr *, struct pf_addr*,
+	    struct pf_addr *, struct pf_addr *, u_int8_t);
+void	pf_addr_inc(struct pf_addr *, sa_family_t);
+int	pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *);
+#endif /* INET6 */
+
+u_int32_t	pf_new_isn(struct pf_state *);
+void   *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *,
+	    sa_family_t);
+void	pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t);
+void	pf_change_proto_a(struct mbuf *, void *, u_int16_t *, u_int32_t,
+	    u_int8_t);
+void	pf_change_tcp_a(struct mbuf *, void *, u_int16_t *, u_int32_t);
+void	pf_send_deferred_syn(struct pf_state *);
+int	pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *,
+	    struct pf_addr *, sa_family_t);
+int	pf_match_addr_range(struct pf_addr *, struct pf_addr *,
+	    struct pf_addr *, sa_family_t);
+int	pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t);
+
+void	pf_normalize_init(void);
+void	pf_normalize_cleanup(void);
+int	pf_normalize_tcp(int, struct pfi_kif *, struct mbuf *, int, int, void *,
+	    struct pf_pdesc *);
+void	pf_normalize_tcp_cleanup(struct pf_state *);
+int	pf_normalize_tcp_init(struct mbuf *, int, struct pf_pdesc *,
+	    struct tcphdr *, struct pf_state_peer *, struct pf_state_peer *);
+int	pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *,
+	    u_short *, struct tcphdr *, struct pf_state *,
+	    struct pf_state_peer *, struct pf_state_peer *, int *);
+u_int32_t
+	pf_state_expires(const struct pf_state *);
+void	pf_purge_expired_fragments(void);
+int	pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *,
+	    int);
+int	pf_socket_lookup(int, struct pf_pdesc *, struct mbuf *);
+struct pf_state_key *pf_alloc_state_key(int);
+void	pfr_initialize(void);
+void	pfr_cleanup(void);
+int	pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t);
+void	pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t,
+	    u_int64_t, int, int, int);
+int	pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, sa_family_t);
+void	pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *);
+struct pfr_ktable *
+	pfr_attach_table(struct pf_ruleset *, char *);
+void	pfr_detach_table(struct pfr_ktable *);
+int	pfr_clr_tables(struct pfr_table *, int *, int);
+int	pfr_add_tables(struct pfr_table *, int, int *, int);
+int	pfr_del_tables(struct pfr_table *, int, int *, int);
+int	pfr_table_count(struct pfr_table *, int);
+int	pfr_get_tables(struct pfr_table *, struct pfr_table *, int *, int);
+int	pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, int *, int);
+int	pfr_clr_tstats(struct pfr_table *, int, int *, int);
+int	pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int);
+int	pfr_clr_addrs(struct pfr_table *, int *, int);
+int	pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, long);
+int	pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int);
+int	pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int);
+int	pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int *, int *, int *, int, u_int32_t);
+int	pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int);
+int	pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int);
+int	pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int);
+int	pfr_tst_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int);
+int	pfr_ina_begin(struct pfr_table *, u_int32_t *, int *, int);
+int	pfr_ina_rollback(struct pfr_table *, u_int32_t, int *, int);
+int	pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int);
+int	pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *,
+	    int *, u_int32_t, int);
+
+MALLOC_DECLARE(PFI_MTYPE);
+VNET_DECLARE(struct pfi_kif *,		 pfi_all);
+#define	V_pfi_all	 		 VNET(pfi_all)
+
+void		 pfi_initialize(void);
+void		 pfi_cleanup(void);
+void		 pfi_kif_ref(struct pfi_kif *);
+void		 pfi_kif_unref(struct pfi_kif *);
+struct pfi_kif	*pfi_kif_find(const char *);
+struct pfi_kif	*pfi_kif_attach(struct pfi_kif *, const char *);
+int		 pfi_kif_match(struct pfi_kif *, struct pfi_kif *);
+void		 pfi_kif_purge(void);
+int		 pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *,
+		    sa_family_t);
+int		 pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t);
+void		 pfi_dynaddr_remove(struct pfi_dynaddr *);
+void		 pfi_dynaddr_copyout(struct pf_addr_wrap *);
+void		 pfi_update_status(const char *, struct pf_status *);
+void		 pfi_get_ifaces(const char *, struct pfi_kif *, int *);
+int		 pfi_set_flags(const char *, int);
+int		 pfi_clear_flags(const char *, int);
+
+int		 pf_match_tag(struct mbuf *, struct pf_rule *, int *, int);
+int		 pf_tag_packet(struct mbuf *, struct pf_pdesc *, int);
+int		 pf_addr_cmp(struct pf_addr *, struct pf_addr *,
+		    sa_family_t);
+void		 pf_qid2qname(u_int32_t, char *);
+
+VNET_DECLARE(struct pf_kstatus, pf_status);
+#define	V_pf_status	VNET(pf_status)
+
+struct pf_limit {
+	uma_zone_t	zone;
+	u_int		limit;
+};
+VNET_DECLARE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
+#define	V_pf_limits VNET(pf_limits)
+
+#endif /* _KERNEL */
+
+#ifdef _KERNEL
+VNET_DECLARE(struct pf_anchor_global,		 pf_anchors);
+#define	V_pf_anchors				 VNET(pf_anchors)
+VNET_DECLARE(struct pf_anchor,			 pf_main_anchor);
+#define	V_pf_main_anchor			 VNET(pf_main_anchor)
+#define pf_main_ruleset	V_pf_main_anchor.ruleset
+#endif
+
+/* these ruleset functions can be linked into userland programs (pfctl) */
+int			 pf_get_ruleset_number(u_int8_t);
+void			 pf_init_ruleset(struct pf_ruleset *);
+int			 pf_anchor_setup(struct pf_rule *,
+			    const struct pf_ruleset *, const char *);
+int			 pf_anchor_copyout(const struct pf_ruleset *,
+			    const struct pf_rule *, struct pfioc_rule *);
+void			 pf_anchor_remove(struct pf_rule *);
+void			 pf_remove_if_empty_ruleset(struct pf_ruleset *);
+struct pf_ruleset	*pf_find_ruleset(const char *);
+struct pf_ruleset	*pf_find_or_create_ruleset(const char *);
+void			 pf_rs_initialize(void);
+
+/* The fingerprint functions can be linked into userland programs (tcpdump) */
+int	pf_osfp_add(struct pf_osfp_ioctl *);
+#ifdef _KERNEL
+struct pf_osfp_enlist *
+	pf_osfp_fingerprint(struct pf_pdesc *, struct mbuf *, int,
+	    const struct tcphdr *);
+#endif /* _KERNEL */
+void	pf_osfp_flush(void);
+int	pf_osfp_get(struct pf_osfp_ioctl *);
+int	pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t);
+
+#ifdef _KERNEL
+void			 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
+
+void			 pf_step_into_anchor(struct pf_anchor_stackframe *, int *,
+			    struct pf_ruleset **, int, struct pf_rule **,
+			    struct pf_rule **, int *);
+int			 pf_step_out_of_anchor(struct pf_anchor_stackframe *, int *,
+			    struct pf_ruleset **, int, struct pf_rule **,
+			    struct pf_rule **, int *);
+
+int			 pf_map_addr(u_int8_t, struct pf_rule *,
+			    struct pf_addr *, struct pf_addr *,
+			    struct pf_addr *, struct pf_src_node **);
+struct pf_rule		*pf_get_translation(struct pf_pdesc *, struct mbuf *,
+			    int, int, struct pfi_kif *, struct pf_src_node **,
+			    struct pf_state_key **, struct pf_state_key **,
+			    struct pf_addr *, struct pf_addr *,
+			    uint16_t, uint16_t, struct pf_anchor_stackframe *);
+
+struct pf_state_key	*pf_state_key_setup(struct pf_pdesc *, struct pf_addr *,
+			    struct pf_addr *, u_int16_t, u_int16_t);
+struct pf_state_key	*pf_state_key_clone(struct pf_state_key *);
+#endif /* _KERNEL */
+
+#endif /* _NET_PFVAR_H_ */


Property changes on: trunk/sys/net/pfvar.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/ppp_defs.h
===================================================================
--- trunk/sys/net/ppp_defs.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/ppp_defs.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -26,12 +26,14 @@
  * OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
  * OR MODIFICATIONS.
  *
- * $FreeBSD: stable/9/sys/net/ppp_defs.h 139823 2005-01-07 01:45:51Z imp $
+ * $FreeBSD: stable/10/sys/net/ppp_defs.h 250887 2013-05-21 21:20:10Z ed $
  */
 
 #ifndef _PPP_DEFS_H_
 #define _PPP_DEFS_H_
 
+#include <sys/_types.h>
+
 /*
  * The basic PPP frame.
  */
@@ -84,7 +86,7 @@
 /*
  * Extended asyncmap - allows any character to be escaped.
  */
-typedef u_int32_t	ext_accm[8];
+typedef __uint32_t	ext_accm[8];
 
 /*
  * What to do with network protocol (NP) packets.
@@ -144,8 +146,8 @@
  * the last NP packet was sent or received.
  */
 struct ppp_idle {
-    time_t xmit_idle;		/* time since last NP packet sent */
-    time_t recv_idle;		/* time since last NP packet received */
+    __time_t xmit_idle;		/* time since last NP packet sent */
+    __time_t recv_idle;		/* time since last NP packet received */
 };
 
 #ifndef __P

Modified: trunk/sys/net/radix.c
===================================================================
--- trunk/sys/net/radix.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/radix.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)radix.c	8.5 (Berkeley) 5/19/95
- * $FreeBSD: stable/9/sys/net/radix.c 210122 2010-07-15 14:41:59Z luigi $
+ * $FreeBSD: stable/10/sys/net/radix.c 273185 2014-10-16 20:46:02Z glebius $
  */
 
 /*
@@ -67,28 +67,28 @@
 	 *rn_search(void *, struct radix_node *),
 	 *rn_search_m(void *, struct radix_node *, void *);
 
-static int	max_keylen;
-static struct radix_mask *rn_mkfreelist;
-static struct radix_node_head *mask_rnhead;
+static void rn_detachhead_internal(void **head);
+static int rn_inithead_internal(void **head, int off);
+
+#define	RADIX_MAX_KEY_LEN	32
+
+static char rn_zeros[RADIX_MAX_KEY_LEN];
+static char rn_ones[RADIX_MAX_KEY_LEN] = {
+	-1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1,
+};
+
 /*
- * Work area -- the following point to 3 buffers of size max_keylen,
- * allocated in this order in a block of memory malloc'ed by rn_init.
- * rn_zeros, rn_ones are set in rn_init and used in readonly afterwards.
- * addmask_key is used in rn_addmask in rw mode and not thread-safe.
+ * XXX: Compat stuff for old rn_addmask() users
  */
-static char *rn_zeros, *rn_ones, *addmask_key;
+static struct radix_node_head *mask_rnhead_compat;
+#ifdef	_KERNEL
+static struct mtx mask_mtx;
+#endif
 
-#define MKGet(m) {						\
-	if (rn_mkfreelist) {					\
-		m = rn_mkfreelist;				\
-		rn_mkfreelist = (m)->rm_mklist;			\
-	} else							\
-		R_Malloc(m, struct radix_mask *, sizeof (struct radix_mask)); }
- 
-#define MKFree(m) { (m)->rm_mklist = rn_mkfreelist; rn_mkfreelist = (m);}
 
-#define rn_masktop (mask_rnhead->rnh_treetop)
-
 static int	rn_lexobetter(void *m_arg, void *n_arg);
 static struct radix_mask *
 		rn_new_radix_mask(struct radix_node *tt,
@@ -157,12 +157,10 @@
  * Search a node in the tree matching the key.
  */
 static struct radix_node *
-rn_search(v_arg, head)
-	void *v_arg;
-	struct radix_node *head;
+rn_search(void *v_arg, struct radix_node *head)
 {
-	register struct radix_node *x;
-	register caddr_t v;
+	struct radix_node *x;
+	caddr_t v;
 
 	for (x = head, v = v_arg; x->rn_bit >= 0;) {
 		if (x->rn_bmask & v[x->rn_offset])
@@ -178,12 +176,10 @@
  * XXX note this function is used only once.
  */
 static struct radix_node *
-rn_search_m(v_arg, head, m_arg)
-	struct radix_node *head;
-	void *v_arg, *m_arg;
+rn_search_m(void *v_arg, struct radix_node *head, void *m_arg)
 {
-	register struct radix_node *x;
-	register caddr_t v = v_arg, m = m_arg;
+	struct radix_node *x;
+	caddr_t v = v_arg, m = m_arg;
 
 	for (x = head; x->rn_bit >= 0;) {
 		if ((x->rn_bmask & m[x->rn_offset]) &&
@@ -192,15 +188,14 @@
 		else
 			x = x->rn_left;
 	}
-	return x;
+	return (x);
 }
 
 int
-rn_refines(m_arg, n_arg)
-	void *m_arg, *n_arg;
+rn_refines(void *m_arg, void *n_arg)
 {
-	register caddr_t m = m_arg, n = n_arg;
-	register caddr_t lim, lim2 = lim = n + LEN(n);
+	caddr_t m = m_arg, n = n_arg;
+	caddr_t lim, lim2 = lim = n + LEN(n);
 	int longer = LEN(n++) - LEN(m++);
 	int masks_are_equal = 1;
 
@@ -208,49 +203,71 @@
 		lim -= longer;
 	while (n < lim) {
 		if (*n & ~(*m))
-			return 0;
+			return (0);
 		if (*n++ != *m++)
 			masks_are_equal = 0;
 	}
 	while (n < lim2)
 		if (*n++)
-			return 0;
+			return (0);
 	if (masks_are_equal && (longer < 0))
 		for (lim2 = m - longer; m < lim2; )
 			if (*m++)
-				return 1;
+				return (1);
 	return (!masks_are_equal);
 }
 
+/*
+ * Search for exact match in given @head.
+ * Assume host bits are cleared in @v_arg if @m_arg is not NULL
+ * Note that prefixes with /32 or /128 masks are treated differently
+ * from host routes.
+ */
 struct radix_node *
-rn_lookup(v_arg, m_arg, head)
-	void *v_arg, *m_arg;
-	struct radix_node_head *head;
+rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
 {
-	register struct radix_node *x;
-	caddr_t netmask = 0;
+	struct radix_node *x;
+	caddr_t netmask;
 
-	if (m_arg) {
-		x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset);
-		if (x == 0)
-			return (0);
+	if (m_arg != NULL) {
+		/*
+		 * Most common case: search exact prefix/mask
+		 */
+		x = rn_addmask_r(m_arg, head->rnh_masks, 1,
+		    head->rnh_treetop->rn_offset);
+		if (x == NULL)
+			return (NULL);
 		netmask = x->rn_key;
-	}
-	x = rn_match(v_arg, head);
-	if (x && netmask) {
-		while (x && x->rn_mask != netmask)
+
+		x = rn_match(v_arg, head);
+
+		while (x != NULL && x->rn_mask != netmask)
 			x = x->rn_dupedkey;
+
+		return (x);
 	}
-	return x;
+
+	/*
+	 * Search for host address.
+	 */
+	if ((x = rn_match(v_arg, head)) == NULL)
+		return (NULL);
+
+	/* Check if found key is the same */
+	if (LEN(x->rn_key) != LEN(v_arg) || bcmp(x->rn_key, v_arg, LEN(v_arg)))
+		return (NULL);
+
+	/* Check if this is not host route */
+	if (x->rn_mask != NULL)
+		return (NULL);
+
+	return (x);
 }
 
 static int
-rn_satisfies_leaf(trial, leaf, skip)
-	char *trial;
-	register struct radix_node *leaf;
-	int skip;
+rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip)
 {
-	register char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask;
+	char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask;
 	char *cplim;
 	int length = min(LEN(cp), LEN(cp2));
 
@@ -261,22 +278,23 @@
 	cplim = cp + length; cp3 += skip; cp2 += skip;
 	for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
 		if ((*cp ^ *cp2) & *cp3)
-			return 0;
-	return 1;
+			return (0);
+	return (1);
 }
 
+/*
+ * Search for longest-prefix match in given @head
+ */
 struct radix_node *
-rn_match(v_arg, head)
-	void *v_arg;
-	struct radix_node_head *head;
+rn_match(void *v_arg, struct radix_node_head *head)
 {
 	caddr_t v = v_arg;
-	register struct radix_node *t = head->rnh_treetop, *x;
-	register caddr_t cp = v, cp2;
+	struct radix_node *t = head->rnh_treetop, *x;
+	caddr_t cp = v, cp2;
 	caddr_t cplim;
 	struct radix_node *saved_t, *top = t;
 	int off = t->rn_offset, vlen = LEN(cp), matched_off;
-	register int test, b, rn_bit;
+	int test, b, rn_bit;
 
 	/*
 	 * Open code rn_search(v, top) to avoid overhead of extra
@@ -314,7 +332,7 @@
 	 */
 	if (t->rn_flags & RNF_ROOT)
 		t = t->rn_dupedkey;
-	return t;
+	return (t);
 on1:
 	test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
 	for (b = 7; (test >>= 1) > 0;)
@@ -335,13 +353,13 @@
 		 */
 		if (t->rn_flags & RNF_NORMAL) {
 			if (rn_bit <= t->rn_bit)
-				return t;
+				return (t);
 		} else if (rn_satisfies_leaf(v, t, matched_off))
-				return t;
+				return (t);
 	t = saved_t;
 	/* start searching up the tree */
 	do {
-		register struct radix_mask *m;
+		struct radix_mask *m;
 		t = t->rn_parent;
 		m = t->rn_mklist;
 		/*
@@ -360,12 +378,12 @@
 				while (x && x->rn_mask != m->rm_mask)
 					x = x->rn_dupedkey;
 				if (x && rn_satisfies_leaf(v, x, off))
-					return x;
+					return (x);
 			}
 			m = m->rm_mklist;
 		}
 	} while (t != top);
-	return 0;
+	return (0);
 }
 
 #ifdef RN_DEBUG
@@ -387,12 +405,9 @@
  */
 
 static struct radix_node *
-rn_newpair(v, b, nodes)
-	void *v;
-	int b;
-	struct radix_node nodes[2];
+rn_newpair(void *v, int b, struct radix_node nodes[2])
 {
-	register struct radix_node *tt = nodes, *t = tt + 1;
+	struct radix_node *tt = nodes, *t = tt + 1;
 	t->rn_bit = b;
 	t->rn_bmask = 0x80 >> (b & 7);
 	t->rn_left = tt;
@@ -416,29 +431,25 @@
 	tt->rn_ybro = rn_clist;
 	rn_clist = tt;
 #endif
-	return t;
+	return (t);
 }
 
 static struct radix_node *
-rn_insert(v_arg, head, dupentry, nodes)
-	void *v_arg;
-	struct radix_node_head *head;
-	int *dupentry;
-	struct radix_node nodes[2];
+rn_insert(void *v_arg, struct radix_node_head *head, int *dupentry,
+    struct radix_node nodes[2])
 {
 	caddr_t v = v_arg;
 	struct radix_node *top = head->rnh_treetop;
 	int head_off = top->rn_offset, vlen = LEN(v);
-	register struct radix_node *t = rn_search(v_arg, top);
-	register caddr_t cp = v + head_off;
-	register int b;
-	struct radix_node *tt;
+	struct radix_node *t = rn_search(v_arg, top);
+	caddr_t cp = v + head_off;
+	int b;
+	struct radix_node *p, *tt, *x;
     	/*
 	 * Find first bit at which v and t->rn_key differ
 	 */
-    {
-	register caddr_t cp2 = t->rn_key + head_off;
-	register int cmp_res;
+	caddr_t cp2 = t->rn_key + head_off;
+	int cmp_res;
 	caddr_t cplim = v + vlen;
 
 	while (cp < cplim)
@@ -445,15 +456,14 @@
 		if (*cp2++ != *cp++)
 			goto on1;
 	*dupentry = 1;
-	return t;
+	return (t);
 on1:
 	*dupentry = 0;
 	cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
 	for (b = (cp - v) << 3; cmp_res; b--)
 		cmp_res >>= 1;
-    }
-    {
-	register struct radix_node *p, *x = top;
+
+	x = top;
 	cp = v;
 	do {
 		p = x;
@@ -485,33 +495,31 @@
 	if (rn_debug)
 		log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p);
 #endif
-    }
 	return (tt);
 }
 
 struct radix_node *
-rn_addmask(n_arg, search, skip)
-	int search, skip;
-	void *n_arg;
+rn_addmask_r(void *arg, struct radix_node_head *maskhead, int search, int skip)
 {
-	caddr_t netmask = (caddr_t)n_arg;
-	register struct radix_node *x;
-	register caddr_t cp, cplim;
-	register int b = 0, mlen, j;
-	int maskduplicated, m0, isnormal;
+	unsigned char *netmask = arg;
+	unsigned char *cp, *cplim;
+	struct radix_node *x;
+	int b = 0, mlen, j;
+	int maskduplicated, isnormal;
 	struct radix_node *saved_x;
-	static int last_zeroed = 0;
+	unsigned char addmask_key[RADIX_MAX_KEY_LEN];
 
-	if ((mlen = LEN(netmask)) > max_keylen)
-		mlen = max_keylen;
+	if ((mlen = LEN(netmask)) > RADIX_MAX_KEY_LEN)
+		mlen = RADIX_MAX_KEY_LEN;
 	if (skip == 0)
 		skip = 1;
 	if (mlen <= skip)
-		return (mask_rnhead->rnh_nodes);
+		return (maskhead->rnh_nodes);
+
+	bzero(addmask_key, RADIX_MAX_KEY_LEN);
 	if (skip > 1)
 		bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
-	if ((m0 = mlen) > skip)
-		bcopy(netmask + skip, addmask_key + skip, mlen - skip);
+	bcopy(netmask + skip, addmask_key + skip, mlen - skip);
 	/*
 	 * Trim trailing zeroes.
 	 */
@@ -518,25 +526,20 @@
 	for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
 		cp--;
 	mlen = cp - addmask_key;
-	if (mlen <= skip) {
-		if (m0 >= last_zeroed)
-			last_zeroed = mlen;
-		return (mask_rnhead->rnh_nodes);
-	}
-	if (m0 < last_zeroed)
-		bzero(addmask_key + m0, last_zeroed - m0);
-	*addmask_key = last_zeroed = mlen;
-	x = rn_search(addmask_key, rn_masktop);
+	if (mlen <= skip)
+		return (maskhead->rnh_nodes);
+	*addmask_key = mlen;
+	x = rn_search(addmask_key, maskhead->rnh_treetop);
 	if (bcmp(addmask_key, x->rn_key, mlen) != 0)
 		x = 0;
 	if (x || search)
 		return (x);
-	R_Zalloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x));
+	R_Zalloc(x, struct radix_node *, RADIX_MAX_KEY_LEN + 2 * sizeof (*x));
 	if ((saved_x = x) == 0)
 		return (0);
 	netmask = cp = (caddr_t)(x + 2);
 	bcopy(addmask_key, cp, mlen);
-	x = rn_insert(cp, mask_rnhead, &maskduplicated, x);
+	x = rn_insert(cp, maskhead, &maskduplicated, x);
 	if (maskduplicated) {
 		log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
 		Free(saved_x);
@@ -546,20 +549,18 @@
 	 * Calculate index of mask, and check for normalcy.
 	 * First find the first byte with a 0 bit, then if there are
 	 * more bits left (remember we already trimmed the trailing 0's),
-	 * the pattern must be one of those in normal_chars[], or we have
+	 * the bits should be contiguous, otherwise we have got
 	 * a non-contiguous mask.
 	 */
+#define	CONTIG(_c)	(((~(_c) + 1) & (_c)) == (unsigned char)(~(_c) + 1))
 	cplim = netmask + mlen;
 	isnormal = 1;
 	for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;)
 		cp++;
 	if (cp != cplim) {
-		static char normal_chars[] = {
-			0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
-
 		for (j = 0x80; (j & *cp) != 0; j >>= 1)
 			b++;
-		if (*cp != normal_chars[b] || cp != (cplim - 1))
+		if (!CONTIG(*cp) || cp != (cplim - 1))
 			isnormal = 0;
 	}
 	b += (cp - netmask) << 3;
@@ -569,34 +570,48 @@
 	return (x);
 }
 
+struct radix_node *
+rn_addmask(void *n_arg, int search, int skip)
+{
+	struct radix_node *tt;
+
+#ifdef _KERNEL
+	mtx_lock(&mask_mtx);
+#endif
+	tt = rn_addmask_r(&mask_rnhead_compat, n_arg, search, skip);
+
+#ifdef _KERNEL
+	mtx_unlock(&mask_mtx);
+#endif
+
+	return (tt);
+}
+
 static int	/* XXX: arbitrary ordering for non-contiguous masks */
-rn_lexobetter(m_arg, n_arg)
-	void *m_arg, *n_arg;
+rn_lexobetter(void *m_arg, void *n_arg)
 {
-	register u_char *mp = m_arg, *np = n_arg, *lim;
+	u_char *mp = m_arg, *np = n_arg, *lim;
 
 	if (LEN(mp) > LEN(np))
-		return 1;  /* not really, but need to check longer one first */
+		return (1);  /* not really, but need to check longer one first */
 	if (LEN(mp) == LEN(np))
 		for (lim = mp + LEN(mp); mp < lim;)
 			if (*mp++ > *np++)
-				return 1;
-	return 0;
+				return (1);
+	return (0);
 }
 
 static struct radix_mask *
-rn_new_radix_mask(tt, next)
-	register struct radix_node *tt;
-	register struct radix_mask *next;
+rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next)
 {
-	register struct radix_mask *m;
+	struct radix_mask *m;
 
-	MKGet(m);
-	if (m == 0) {
-		log(LOG_ERR, "Mask for route not entered\n");
+	R_Malloc(m, struct radix_mask *, sizeof (struct radix_mask));
+	if (m == NULL) {
+		log(LOG_ERR, "Failed to allocate route mask\n");
 		return (0);
 	}
-	bzero(m, sizeof *m);
+	bzero(m, sizeof(*m));
 	m->rm_bit = tt->rn_bit;
 	m->rm_flags = tt->rn_flags;
 	if (tt->rn_flags & RNF_NORMAL)
@@ -605,17 +620,15 @@
 		m->rm_mask = tt->rn_mask;
 	m->rm_mklist = next;
 	tt->rn_mklist = m;
-	return m;
+	return (m);
 }
 
 struct radix_node *
-rn_addroute(v_arg, n_arg, head, treenodes)
-	void *v_arg, *n_arg;
-	struct radix_node_head *head;
-	struct radix_node treenodes[2];
+rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
+    struct radix_node treenodes[2])
 {
 	caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
-	register struct radix_node *t, *x = 0, *tt;
+	struct radix_node *t, *x = 0, *tt;
 	struct radix_node *saved_tt, *top = head->rnh_treetop;
 	short b = 0, b_leaf = 0;
 	int keyduplicated;
@@ -630,7 +643,8 @@
 	 * nodes and possibly save time in calculating indices.
 	 */
 	if (netmask)  {
-		if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0)
+		x = rn_addmask_r(netmask, head->rnh_masks, 0, top->rn_offset);
+		if (x == NULL)
 			return (0);
 		b_leaf = x->rn_bit;
 		b = -1 - x->rn_bit;
@@ -742,7 +756,7 @@
 on2:
 	/* Add new route to highest possible ancestor's list */
 	if ((netmask == 0) || (b > t->rn_bit ))
-		return tt; /* can't lift at all */
+		return (tt); /* can't lift at all */
 	b_leaf = tt->rn_bit;
 	do {
 		x = t;
@@ -766,7 +780,7 @@
 			    log(LOG_ERR,
 			        "Non-unique normal route, mask not entered\n");
 #endif
-				return tt;
+				return (tt);
 			}
 		} else
 			mmask = m->rm_mask;
@@ -773,7 +787,7 @@
 		if (mmask == netmask) {
 			m->rm_refs++;
 			tt->rn_mklist = m;
-			return tt;
+			return (tt);
 		}
 		if (rn_refines(netmask, mmask)
 		    || rn_lexobetter(netmask, mmask))
@@ -780,15 +794,13 @@
 			break;
 	}
 	*mp = rn_new_radix_mask(tt, *mp);
-	return tt;
+	return (tt);
 }
 
 struct radix_node *
-rn_delete(v_arg, netmask_arg, head)
-	void *v_arg, *netmask_arg;
-	struct radix_node_head *head;
+rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
 {
-	register struct radix_node *t, *p, *x, *tt;
+	struct radix_node *t, *p, *x, *tt;
 	struct radix_mask *m, *saved_m, **mp;
 	struct radix_node *dupedkey, *saved_tt, *top;
 	caddr_t v, netmask;
@@ -809,7 +821,8 @@
 	 * Delete our route from mask lists.
 	 */
 	if (netmask) {
-		if ((x = rn_addmask(netmask, 1, head_off)) == 0)
+		x = rn_addmask_r(netmask, head->rnh_masks, 1, head_off);
+		if (x == NULL)
 			return (0);
 		netmask = x->rn_key;
 		while (tt->rn_mask != netmask)
@@ -821,7 +834,7 @@
 	if (tt->rn_flags & RNF_NORMAL) {
 		if (m->rm_leaf != tt || m->rm_refs > 0) {
 			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
-			return 0;  /* dangling ref could cause disaster */
+			return (0);  /* dangling ref could cause disaster */
 		}
 	} else {
 		if (m->rm_mask != tt->rn_mask) {
@@ -842,7 +855,7 @@
 	for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
 		if (m == saved_m) {
 			*mp = m->rm_mklist;
-			MKFree(m);
+			Free(m);
 			break;
 		}
 	if (m == 0) {
@@ -933,7 +946,7 @@
 					struct radix_mask *mm = m->rm_mklist;
 					x->rn_mklist = 0;
 					if (--(m->rm_refs) < 0)
-						MKFree(m);
+						Free(m);
 					m = mm;
 				}
 			if (m)
@@ -973,17 +986,14 @@
  * exit.
  */
 static int
-rn_walktree_from(h, a, m, f, w)
-	struct radix_node_head *h;
-	void *a, *m;
-	walktree_f_t *f;
-	void *w;
+rn_walktree_from(struct radix_node_head *h, void *a, void *m,
+    walktree_f_t *f, void *w)
 {
 	int error;
 	struct radix_node *base, *next;
 	u_char *xa = (u_char *)a;
 	u_char *xm = (u_char *)m;
-	register struct radix_node *rn, *last = 0 /* shut up gcc */;
+	struct radix_node *rn, *last = NULL; /* shut up gcc */
 	int stopping = 0;
 	int lastb;
 
@@ -1076,18 +1086,15 @@
 		}
 
 	}
-	return 0;
+	return (0);
 }
 
 static int
-rn_walktree(h, f, w)
-	struct radix_node_head *h;
-	walktree_f_t *f;
-	void *w;
+rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w)
 {
 	int error;
 	struct radix_node *base, *next;
-	register struct radix_node *rn = h->rnh_treetop;
+	struct radix_node *rn = h->rnh_treetop;
 	/*
 	 * This gets complicated because we may delete the node
 	 * while applying the function f to it, so we need to calculate
@@ -1129,13 +1136,11 @@
  * bits starting at 'off'.
  * Return 1 on success, 0 on error.
  */
-int
-rn_inithead(head, off)
-	void **head;
-	int off;
+static int
+rn_inithead_internal(void **head, int off)
 {
-	register struct radix_node_head *rnh;
-	register struct radix_node *t, *tt, *ttt;
+	struct radix_node_head *rnh;
+	struct radix_node *t, *tt, *ttt;
 	if (*head)
 		return (1);
 	R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh));
@@ -1164,8 +1169,8 @@
 	return (1);
 }
 
-int
-rn_detachhead(void **head)
+static void
+rn_detachhead_internal(void **head)
 {
 	struct radix_node_head *rnh;
 
@@ -1177,28 +1182,73 @@
 	Free(rnh);
 
 	*head = NULL;
+}
+
+int
+rn_inithead(void **head, int off)
+{
+	struct radix_node_head *rnh;
+
+	if (*head != NULL)
+		return (1);
+
+	if (rn_inithead_internal(head, off) == 0)
+		return (0);
+
+	rnh = (struct radix_node_head *)(*head);
+
+	if (rn_inithead_internal((void **)&rnh->rnh_masks, 0) == 0) {
+		rn_detachhead_internal(head);
+		return (0);
+	}
+
 	return (1);
 }
 
+static int
+rn_freeentry(struct radix_node *rn, void *arg)
+{
+	struct radix_node_head * const rnh = arg;
+	struct radix_node *x;
+
+	x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh);
+	if (x != NULL)
+		Free(x);
+	return (0);
+}
+
+int
+rn_detachhead(void **head)
+{
+	struct radix_node_head *rnh;
+
+	KASSERT((head != NULL && *head != NULL),
+	    ("%s: head already freed", __func__));
+
+	rnh = *head;
+
+	rn_walktree(rnh->rnh_masks, rn_freeentry, rnh->rnh_masks);
+	rn_detachhead_internal((void **)&rnh->rnh_masks);
+	rn_detachhead_internal(head);
+	return (1);
+}
+
 void
 rn_init(int maxk)
 {
-	char *cp, *cplim;
-
-	max_keylen = maxk;
-	if (max_keylen == 0) {
+	if ((maxk <= 0) || (maxk > RADIX_MAX_KEY_LEN)) {
 		log(LOG_ERR,
-		    "rn_init: radix functions require max_keylen be set\n");
+		    "rn_init: max_keylen must be within 1..%d\n",
+		    RADIX_MAX_KEY_LEN);
 		return;
 	}
-	R_Malloc(rn_zeros, char *, 3 * max_keylen);
-	if (rn_zeros == NULL)
-		panic("rn_init");
-	bzero(rn_zeros, 3 * max_keylen);
-	rn_ones = cp = rn_zeros + max_keylen;
-	addmask_key = cplim = rn_ones + max_keylen;
-	while (cp < cplim)
-		*cp++ = -1;
-	if (rn_inithead((void **)(void *)&mask_rnhead, 0) == 0)
+
+	/*
+	 * XXX: Compat for old rn_addmask() users
+	 */
+	if (rn_inithead((void **)(void *)&mask_rnhead_compat, 0) == 0)
 		panic("rn_init 2");
+#ifdef _KERNEL
+	mtx_init(&mask_mtx, "radix_mask", NULL, MTX_DEF);
+#endif
 }

Modified: trunk/sys/net/radix.h
===================================================================
--- trunk/sys/net/radix.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/radix.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)radix.h	8.2 (Berkeley) 10/31/94
- * $FreeBSD: stable/9/sys/net/radix.h 225698 2011-09-20 20:27:26Z kmacy $
+ * $FreeBSD: stable/10/sys/net/radix.h 265708 2014-05-08 20:27:06Z melifaro $
  */
 
 #ifndef _RADIX_H_
@@ -120,9 +120,9 @@
 		(void *v, void *mask, struct radix_node_head *head);
 	struct	radix_node *(*rnh_delpkt)	/* remove based on packet hdr */
 		(void *v, void *mask, struct radix_node_head *head);
-	struct	radix_node *(*rnh_matchaddr)	/* locate based on sockaddr */
+	struct	radix_node *(*rnh_matchaddr)	/* longest match for sockaddr */
 		(void *v, struct radix_node_head *head);
-	struct	radix_node *(*rnh_lookup)	/* locate based on sockaddr */
+	struct	radix_node *(*rnh_lookup)	/*exact match for sockaddr*/
 		(void *v, void *mask, struct radix_node_head *head);
 	struct	radix_node *(*rnh_matchpkt)	/* locate based on packet hdr */
 		(void *v, struct radix_node_head *head);
@@ -137,6 +137,7 @@
 #ifdef _KERNEL
 	struct	rwlock rnh_lock;		/* locks entire radix tree */
 #endif
+	struct	radix_node_head *rnh_masks;	/* Storage for our masks */
 };
 
 #ifndef _KERNEL
@@ -168,6 +169,7 @@
 int	 rn_refines(void *, void *);
 struct radix_node
 	 *rn_addmask(void *, int, int),
+	 *rn_addmask_r(void *, struct radix_node_head *, int, int),
 	 *rn_addroute (void *, void *, struct radix_node_head *,
 			struct radix_node [2]),
 	 *rn_delete(void *, void *, struct radix_node_head *),

Modified: trunk/sys/net/radix_mpath.c
===================================================================
--- trunk/sys/net/radix_mpath.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/radix_mpath.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/radix_mpath.c 225163 2011-08-25 04:31:20Z qingli $");
+__FBSDID("$FreeBSD: stable/10/sys/net/radix_mpath.c 265711 2014-05-08 20:41:39Z melifaro $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -86,7 +86,7 @@
 	
 	while (rn != NULL) {
 		rt = (struct rtentry *)rn;
-		i += rt->rt_rmx.rmx_weight;
+		i += rt->rt_weight;
 		rn = rn_mpath_next(rn);
 	}
 	return (i);
@@ -113,11 +113,16 @@
 		if (rt->rt_gateway->sa_family == AF_LINK) {
 			if (!memcmp(rt->rt_ifa->ifa_addr, gate, gate->sa_len))
 				break;
-		} else {
-			if (rt->rt_gateway->sa_len == gate->sa_len &&
-			    !memcmp(rt->rt_gateway, gate, gate->sa_len))
-				break;
 		}
+
+		/*
+		 * Check for other options:
+		 * 1) Routes with 'real' IPv4/IPv6 gateway
+		 * 2) Loopback host routes (another AF_LINK/sockadd_dl check)
+		 * */
+		if (rt->rt_gateway->sa_len == gate->sa_len &&
+		    !memcmp(rt->rt_gateway, gate, gate->sa_len))
+			break;
 	} while ((rn = rn_mpath_next(rn)) != NULL);
 
 	return (struct rtentry *)rn;
@@ -152,6 +157,7 @@
 
 /*
  * check if we have the same key/mask/gateway on the table already.
+ * Assume @rt rt_key host bits are cleared according to @netmask
  */
 int
 rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
@@ -159,76 +165,13 @@
 {
 	struct radix_node *rn, *rn1;
 	struct rtentry *rt1;
-	char *p, *q, *eq;
-	int same, l, skip;
 
 	rn = (struct radix_node *)rt;
 	rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
 	if (!rn1 || rn1->rn_flags & RNF_ROOT)
-		return 0;
+		return (0);
 
-	/*
-	 * unlike other functions we have in this file, we have to check
-	 * all key/mask/gateway as rnh_lookup can match less specific entry.
-	 */
-	rt1 = (struct rtentry *)rn1;
-
-	/* compare key. */
-	if (rt_key(rt1)->sa_len != rt_key(rt)->sa_len ||
-	    bcmp(rt_key(rt1), rt_key(rt), rt_key(rt1)->sa_len))
-		goto different;
-
-	/* key was the same.  compare netmask.  hairy... */
-	if (rt_mask(rt1) && netmask) {
-		skip = rnh->rnh_treetop->rn_offset;
-		if (rt_mask(rt1)->sa_len > netmask->sa_len) {
-			/*
-			 * as rt_mask(rt1) is made optimal by radix.c,
-			 * there must be some 1-bits on rt_mask(rt1)
-			 * after netmask->sa_len.  therefore, in
-			 * this case, the entries are different.
-			 */
-			if (rt_mask(rt1)->sa_len > skip)
-				goto different;
-			else {
-				/* no bits to compare, i.e. same*/
-				goto maskmatched;
-			}
-		}
-
-		l = rt_mask(rt1)->sa_len;
-		if (skip > l) {
-			/* no bits to compare, i.e. same */
-			goto maskmatched;
-		}
-		p = (char *)rt_mask(rt1);
-		q = (char *)netmask;
-		if (bcmp(p + skip, q + skip, l - skip))
-			goto different;
-		/*
-		 * need to go through all the bit, as netmask is not
-		 * optimal and can contain trailing 0s
-		 */
-		eq = (char *)netmask + netmask->sa_len;
-		q += l;
-		same = 1;
-		while (eq > q)
-			if (*q++) {
-				same = 0;
-				break;
-			}
-		if (!same)
-			goto different;
-	} else if (!rt_mask(rt1) && !netmask)
-		; /* no mask to compare, i.e. same */
-	else {
-		/* one has mask and the other does not, different */
-		goto different;
-	}
-
-maskmatched:
-
-	/* key/mask were the same.  compare gateway for all multipaths */
+	/* key/mask are the same. compare gateway for all multipaths */
 	do {
 		rt1 = (struct rtentry *)rn1;
 
@@ -249,11 +192,10 @@
 		}
 
 		/* all key/mask/gateway are the same.  conflicting entry. */
-		return EEXIST;
+		return (EEXIST);
 	} while ((rn1 = rn_mpath_next(rn1)) != NULL);
 
-different:
-	return 0;
+	return (0);
 }
 
 void
@@ -289,8 +231,8 @@
 	hash += hashjitter;
 	hash %= n;
 	for (weight = abs((int32_t)hash), rt = ro->ro_rt;
-	     weight >= rt->rt_rmx.rmx_weight && rn; 
-	     weight -= rt->rt_rmx.rmx_weight) {
+	     weight >= rt->rt_weight && rn; 
+	     weight -= rt->rt_weight) {
 		
 		/* stay within the multipath routes */
 		if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)

Modified: trunk/sys/net/radix_mpath.h
===================================================================
--- trunk/sys/net/radix_mpath.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/radix_mpath.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -33,7 +33,7 @@
  * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL
  * PROPERTIES.
  */
-/* $FreeBSD: stable/9/sys/net/radix_mpath.h 179426 2008-05-30 09:34:35Z qingli $ */
+/* $FreeBSD: stable/10/sys/net/radix_mpath.h 179426 2008-05-30 09:34:35Z qingli $ */
 
 #ifndef _NET_RADIX_MPATH_H_
 #define	_NET_RADIX_MPATH_H_

Modified: trunk/sys/net/raw_cb.c
===================================================================
--- trunk/sys/net/raw_cb.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/raw_cb.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -29,7 +29,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)raw_cb.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/raw_cb.c 248085 2013-03-09 02:36:32Z marius $
+ * $FreeBSD: stable/10/sys/net/raw_cb.c 227309 2011-11-07 15:43:11Z ed $
  */
 
 #include <sys/param.h>

Modified: trunk/sys/net/raw_cb.h
===================================================================
--- trunk/sys/net/raw_cb.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/raw_cb.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -29,7 +29,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)raw_cb.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/raw_cb.h 225970 2011-10-04 11:35:18Z bz $
+ * $FreeBSD: stable/10/sys/net/raw_cb.h 225837 2011-09-28 13:48:36Z bz $
  */
 
 #ifndef _NET_RAW_CB_H_

Modified: trunk/sys/net/raw_usrreq.c
===================================================================
--- trunk/sys/net/raw_usrreq.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/raw_usrreq.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -29,7 +29,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)raw_usrreq.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/raw_usrreq.c 225970 2011-10-04 11:35:18Z bz $
+ * $FreeBSD: stable/10/sys/net/raw_usrreq.c 225837 2011-09-28 13:48:36Z bz $
  */
 
 #include <sys/param.h>

Added: trunk/sys/net/rndis.h
===================================================================
--- trunk/sys/net/rndis.h	                        (rev 0)
+++ trunk/sys/net/rndis.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,384 @@
+/* $MidnightBSD$ */
+/*	$FreeBSD: stable/10/sys/net/rndis.h 321760 2017-07-31 06:40:09Z sephe $ */
+/*	$OpenBSD: if_urndisreg.h,v 1.19 2013/11/21 14:08:05 mpi Exp $ */
+
+/*
+ * Copyright (c) 2010 Jonathan Armani <armani at openbsd.org>
+ * Copyright (c) 2010 Fabien Romano <fabien at openbsd.org>
+ * Copyright (c) 2010 Michael Knudsen <mk at openbsd.org>
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef	_NET_RNDIS_H_
+#define	_NET_RNDIS_H_
+
+/* Canonical major/minor version as of 22th Aug. 2016. */
+#define	RNDIS_VERSION_MAJOR		0x00000001
+#define	RNDIS_VERSION_MINOR		0x00000000
+
+#define	RNDIS_STATUS_SUCCESS 		0x00000000L
+#define	RNDIS_STATUS_PENDING 		0x00000103L
+#define	RNDIS_STATUS_MEDIA_CONNECT 	0x4001000BL
+#define	RNDIS_STATUS_MEDIA_DISCONNECT 	0x4001000CL
+#define	RNDIS_STATUS_LINK_SPEED_CHANGE	0x40010013L
+#define	RNDIS_STATUS_NETWORK_CHANGE	0x40010018L
+#define	RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG	0x40020006L
+#define	RNDIS_STATUS_BUFFER_OVERFLOW 	0x80000005L
+#define	RNDIS_STATUS_FAILURE 		0xC0000001L
+#define	RNDIS_STATUS_NOT_SUPPORTED 	0xC00000BBL
+#define	RNDIS_STATUS_RESOURCES 		0xC000009AL
+#define	RNDIS_STATUS_INVALID_DATA 	0xC0010015L
+
+#define	OID_GEN_SUPPORTED_LIST		0x00010101
+#define	OID_GEN_HARDWARE_STATUS		0x00010102
+#define	OID_GEN_MEDIA_SUPPORTED		0x00010103
+#define	OID_GEN_MEDIA_IN_USE		0x00010104
+#define	OID_GEN_MAXIMUM_LOOKAHEAD	0x00010105
+#define	OID_GEN_MAXIMUM_FRAME_SIZE	0x00010106
+#define	OID_GEN_LINK_SPEED		0x00010107
+#define	OID_GEN_TRANSMIT_BUFFER_SPACE	0x00010108
+#define	OID_GEN_RECEIVE_BUFFER_SPACE	0x00010109
+#define	OID_GEN_TRANSMIT_BLOCK_SIZE	0x0001010A
+#define	OID_GEN_RECEIVE_BLOCK_SIZE	0x0001010B
+#define	OID_GEN_VENDOR_ID		0x0001010C
+#define	OID_GEN_VENDOR_DESCRIPTION	0x0001010D
+#define	OID_GEN_CURRENT_PACKET_FILTER	0x0001010E
+#define	OID_GEN_CURRENT_LOOKAHEAD	0x0001010F
+#define	OID_GEN_DRIVER_VERSION		0x00010110
+#define	OID_GEN_MAXIMUM_TOTAL_SIZE	0x00010111
+#define	OID_GEN_PROTOCOL_OPTIONS	0x00010112
+#define	OID_GEN_MAC_OPTIONS		0x00010113
+#define	OID_GEN_MEDIA_CONNECT_STATUS	0x00010114
+#define	OID_GEN_MAXIMUM_SEND_PACKETS	0x00010115
+#define	OID_GEN_VENDOR_DRIVER_VERSION	0x00010116
+#define	OID_GEN_SUPPORTED_GUIDS		0x00010117
+#define	OID_GEN_NETWORK_LAYER_ADDRESSES	0x00010118
+#define	OID_GEN_TRANSPORT_HEADER_OFFSET	0x00010119
+#define	OID_GEN_RECEIVE_SCALE_CAPABILITIES	0x00010203
+#define	OID_GEN_RECEIVE_SCALE_PARAMETERS	0x00010204
+#define	OID_GEN_MACHINE_NAME		0x0001021A
+#define	OID_GEN_RNDIS_CONFIG_PARAMETER	0x0001021B
+#define	OID_GEN_VLAN_ID			0x0001021C
+
+#define	OID_802_3_PERMANENT_ADDRESS	0x01010101
+#define	OID_802_3_CURRENT_ADDRESS	0x01010102
+#define	OID_802_3_MULTICAST_LIST	0x01010103
+#define	OID_802_3_MAXIMUM_LIST_SIZE	0x01010104
+#define	OID_802_3_MAC_OPTIONS		0x01010105
+#define	OID_802_3_RCV_ERROR_ALIGNMENT	0x01020101
+#define	OID_802_3_XMIT_ONE_COLLISION	0x01020102
+#define	OID_802_3_XMIT_MORE_COLLISIONS	0x01020103
+#define	OID_802_3_XMIT_DEFERRED		0x01020201
+#define	OID_802_3_XMIT_MAX_COLLISIONS	0x01020202
+#define	OID_802_3_RCV_OVERRUN		0x01020203
+#define	OID_802_3_XMIT_UNDERRUN		0x01020204
+#define	OID_802_3_XMIT_HEARTBEAT_FAILURE	0x01020205
+#define	OID_802_3_XMIT_TIMES_CRS_LOST	0x01020206
+#define	OID_802_3_XMIT_LATE_COLLISIONS	0x01020207
+
+#define	OID_TCP_OFFLOAD_PARAMETERS	0xFC01020C
+#define	OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES	0xFC01020D
+
+#define	RNDIS_MEDIUM_802_3		0x00000000
+
+/* Device flags */
+#define	RNDIS_DF_CONNECTIONLESS		0x00000001
+#define	RNDIS_DF_CONNECTION_ORIENTED	0x00000002
+
+/*
+ * Common RNDIS message header.
+ */
+struct rndis_msghdr {
+	uint32_t rm_type;
+	uint32_t rm_len;
+};
+
+/*
+ * RNDIS data message
+ */
+#define	REMOTE_NDIS_PACKET_MSG		0x00000001
+
+struct rndis_packet_msg {
+	uint32_t rm_type;
+	uint32_t rm_len;
+	uint32_t rm_dataoffset;
+	uint32_t rm_datalen;
+	uint32_t rm_oobdataoffset;
+	uint32_t rm_oobdatalen;
+	uint32_t rm_oobdataelements;
+	uint32_t rm_pktinfooffset;
+	uint32_t rm_pktinfolen;
+	uint32_t rm_vchandle;
+	uint32_t rm_reserved;
+};
+
+/*
+ * Minimum value for rm_dataoffset, rm_oobdataoffset, and
+ * rm_pktinfooffset.
+ */
+#define	RNDIS_PACKET_MSG_OFFSET_MIN		\
+	(sizeof(struct rndis_packet_msg) -	\
+	 __offsetof(struct rndis_packet_msg, rm_dataoffset))
+
+/* Offset from the beginning of rndis_packet_msg. */
+#define	RNDIS_PACKET_MSG_OFFSET_ABS(ofs)	\
+	((ofs) + __offsetof(struct rndis_packet_msg, rm_dataoffset))
+
+#define	RNDIS_PACKET_MSG_OFFSET_ALIGN		4
+#define	RNDIS_PACKET_MSG_OFFSET_ALIGNMASK	\
+	(RNDIS_PACKET_MSG_OFFSET_ALIGN - 1)
+
+/* Per-packet-info for RNDIS data message */
+struct rndis_pktinfo {
+	uint32_t rm_size;
+	uint32_t rm_type;		/* NDIS_PKTINFO_TYPE_ */
+	uint32_t rm_pktinfooffset;
+	uint8_t rm_data[];
+};
+
+#define	RNDIS_PKTINFO_OFFSET		\
+	__offsetof(struct rndis_pktinfo, rm_data[0])
+#define	RNDIS_PKTINFO_SIZE_ALIGN	4
+#define	RNDIS_PKTINFO_SIZE_ALIGNMASK	(RNDIS_PKTINFO_SIZE_ALIGN - 1)
+
+#define	NDIS_PKTINFO_TYPE_CSUM		0
+#define	NDIS_PKTINFO_TYPE_IPSEC		1
+#define	NDIS_PKTINFO_TYPE_LSO		2
+#define	NDIS_PKTINFO_TYPE_CLASSIFY	3
+/* reserved 4 */
+#define	NDIS_PKTINFO_TYPE_SGLIST	5
+#define	NDIS_PKTINFO_TYPE_VLAN		6
+#define	NDIS_PKTINFO_TYPE_ORIG		7
+#define	NDIS_PKTINFO_TYPE_PKT_CANCELID	8
+#define	NDIS_PKTINFO_TYPE_ORIG_NBLIST	9
+#define	NDIS_PKTINFO_TYPE_CACHE_NBLIST	10
+#define	NDIS_PKTINFO_TYPE_PKT_PAD	11
+
+/*
+ * RNDIS control messages
+ */
+
+/*
+ * Common header for RNDIS completion messages.
+ *
+ * NOTE: It does not apply to REMOTE_NDIS_RESET_CMPLT.
+ */
+struct rndis_comp_hdr {
+	uint32_t rm_type;
+	uint32_t rm_len;
+	uint32_t rm_rid;
+	uint32_t rm_status;
+};
+
+/* Initialize the device. */
+#define	REMOTE_NDIS_INITIALIZE_MSG	0x00000002
+#define	REMOTE_NDIS_INITIALIZE_CMPLT	0x80000002
+
+struct rndis_init_req {
+	uint32_t rm_type;
+	uint32_t rm_len;
+	uint32_t rm_rid;
+	uint32_t rm_ver_major;
+	uint32_t rm_ver_minor;
+	uint32_t rm_max_xfersz;
+};
+
+struct rndis_init_comp {
+	uint32_t rm_type;
+	uint32_t rm_len;
+	uint32_t rm_rid;
+	uint32_t rm_status;
+	uint32_t rm_ver_major;
+	uint32_t rm_ver_minor;
+	uint32_t rm_devflags;
+	uint32_t rm_medium;
+	uint32_t rm_pktmaxcnt;
+	uint32_t rm_pktmaxsz;
+	uint32_t rm_align;
+	uint32_t rm_aflistoffset;
+	uint32_t rm_aflistsz;
+};
+
+#define	RNDIS_INIT_COMP_SIZE_MIN	\
+	__offsetof(struct rndis_init_comp, rm_aflistsz)
+
+/* Halt the device.  No response sent. */
+#define	REMOTE_NDIS_HALT_MSG		0x00000003
+
+struct rndis_halt_req {
+	uint32_t rm_type;
+	uint32_t rm_len;
+	uint32_t rm_rid;
+};
+
+/* Send a query object. */
+#define	REMOTE_NDIS_QUERY_MSG		0x00000004
+#define	REMOTE_NDIS_QUERY_CMPLT		0x80000004
+
+struct rndis_query_req {
+	uint32_t rm_type;
+	uint32_t rm_len;
+	uint32_t rm_rid;
+	uint32_t rm_oid;
+	uint32_t rm_infobuflen;
+	uint32_t rm_infobufoffset;
+	uint32_t rm_devicevchdl;
+};
+
+#define	RNDIS_QUERY_REQ_INFOBUFOFFSET		\
+	(sizeof(struct rndis_query_req) -	\
+	 __offsetof(struct rndis_query_req, rm_rid))
+
+struct rndis_query_comp {
+	uint32_t rm_type;
+	uint32_t rm_len;
+	uint32_t rm_rid;
+	uint32_t rm_status;
+	uint32_t rm_infobuflen;
+	uint32_t rm_infobufoffset;
+};
+
+/* infobuf offset from the beginning of rndis_query_comp. */
+#define	RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(ofs)	\
+	((ofs) + __offsetof(struct rndis_query_req, rm_rid))
+
+/* Send a set object request. */
+#define	REMOTE_NDIS_SET_MSG		0x00000005
+#define	REMOTE_NDIS_SET_CMPLT		0x80000005
+
+struct rndis_set_req {
+	uint32_t rm_type;
+	uint32_t rm_len;
+	uint32_t rm_rid;
+	uint32_t rm_oid;
+	uint32_t rm_infobuflen;
+	uint32_t rm_infobufoffset;
+	uint32_t rm_devicevchdl;
+};
+
+#define	RNDIS_SET_REQ_INFOBUFOFFSET		\
+	(sizeof(struct rndis_set_req) -		\
+	 __offsetof(struct rndis_set_req, rm_rid))
+
+struct rndis_set_comp {
+	uint32_t rm_type;
+	uint32_t rm_len;
+	uint32_t rm_rid;
+	uint32_t rm_status;
+};
+
+/*
+ * Parameter used by OID_GEN_RNDIS_CONFIG_PARAMETER.
+ */
+#define	REMOTE_NDIS_SET_PARAM_NUMERIC	0x00000000
+#define	REMOTE_NDIS_SET_PARAM_STRING	0x00000002
+
+struct rndis_set_parameter {
+	uint32_t rm_nameoffset;
+	uint32_t rm_namelen;
+	uint32_t rm_type;
+	uint32_t rm_valueoffset;
+	uint32_t rm_valuelen;
+};
+
+/* Perform a soft reset on the device. */
+#define	REMOTE_NDIS_RESET_MSG		0x00000006
+#define	REMOTE_NDIS_RESET_CMPLT		0x80000006
+
+struct rndis_reset_req {
+	uint32_t rm_type;
+	uint32_t rm_len;
+	uint32_t rm_rid;
+};
+
+struct rndis_reset_comp {
+	uint32_t rm_type;
+	uint32_t rm_len;
+	uint32_t rm_status;
+	uint32_t rm_adrreset;
+};
+
+/* 802.3 link-state or undefined message error.  Sent by device. */
+#define	REMOTE_NDIS_INDICATE_STATUS_MSG	0x00000007
+
+struct rndis_status_msg {
+	uint32_t rm_type;
+	uint32_t rm_len;
+	uint32_t rm_status;
+	uint32_t rm_stbuflen;
+	uint32_t rm_stbufoffset;
+	/* rndis_diag_info */
+};
+
+/* stbuf offset from the beginning of rndis_status_msg. */
+#define	RNDIS_STBUFOFFSET_ABS(ofs)	\
+	((ofs) + __offsetof(struct rndis_status_msg, rm_status))
+
+/*
+ * Immediately after rndis_status_msg.rm_stbufoffset, if a control
+ * message is malformatted, or a packet message contains inappropriate
+ * content.
+ */
+struct rndis_diag_info {
+	uint32_t rm_diagstatus;
+	uint32_t rm_erroffset;
+};
+
+/* Keepalive messsage.  May be sent by device. */
+#define	REMOTE_NDIS_KEEPALIVE_MSG	0x00000008
+#define	REMOTE_NDIS_KEEPALIVE_CMPLT	0x80000008
+
+struct rndis_keepalive_req {
+	uint32_t rm_type;
+	uint32_t rm_len;
+	uint32_t rm_rid;
+};
+
+struct rndis_keepalive_comp {
+	uint32_t rm_type;
+	uint32_t rm_len;
+	uint32_t rm_rid;
+	uint32_t rm_status;
+};
+
+/* Packet filter bits used by OID_GEN_CURRENT_PACKET_FILTER */
+#define	NDIS_PACKET_TYPE_NONE			0x00000000
+#define	NDIS_PACKET_TYPE_DIRECTED		0x00000001
+#define	NDIS_PACKET_TYPE_MULTICAST		0x00000002
+#define	NDIS_PACKET_TYPE_ALL_MULTICAST		0x00000004
+#define	NDIS_PACKET_TYPE_BROADCAST		0x00000008
+#define	NDIS_PACKET_TYPE_SOURCE_ROUTING		0x00000010
+#define	NDIS_PACKET_TYPE_PROMISCUOUS		0x00000020
+#define	NDIS_PACKET_TYPE_SMT			0x00000040
+#define	NDIS_PACKET_TYPE_ALL_LOCAL		0x00000080
+#define	NDIS_PACKET_TYPE_GROUP			0x00001000
+#define	NDIS_PACKET_TYPE_ALL_FUNCTIONAL		0x00002000
+#define	NDIS_PACKET_TYPE_FUNCTIONAL		0x00004000
+#define	NDIS_PACKET_TYPE_MAC_FRAME		0x00008000
+
+/*
+ * Packet filter description for use with printf(9) %b identifier.
+ */
+#define	NDIS_PACKET_TYPES				\
+	"\20\1DIRECT\2MULTICAST\3ALLMULTI\4BROADCAST"	\
+	"\5SRCROUTE\6PROMISC\7SMT\10ALLLOCAL"		\
+	"\11GROUP\12ALLFUNC\13FUNC\14MACFRAME"
+
+/* RNDIS offsets */
+#define	RNDIS_HEADER_OFFSET	((uint32_t)sizeof(struct rndis_msghdr))
+#define	RNDIS_DATA_OFFSET	\
+    ((uint32_t)(sizeof(struct rndis_packet_msg) - RNDIS_HEADER_OFFSET))
+
+#endif	/* !_NET_RNDIS_H_ */


Property changes on: trunk/sys/net/rndis.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/route.c
===================================================================
--- trunk/sys/net/route.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/route.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)route.c	8.3.1.1 (Berkeley) 2/23/95
- * $FreeBSD: stable/9/sys/net/route.c 248895 2013-03-29 16:24:20Z melifaro $
+ * $FreeBSD: stable/10/sys/net/route.c 267728 2014-06-22 16:36:14Z tuexen $
  */
 /************************************************************************
  * Note: In this file a 'fib' is a "forwarding information base"	*
@@ -38,6 +38,7 @@
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_route.h"
+#include "opt_sctp.h"
 #include "opt_mrouting.h"
 #include "opt_mpath.h"
 
@@ -69,8 +70,7 @@
 
 #include <vm/uma.h>
 
-/* We use 4 bits in the mbuf flags, thus we are limited to 16 FIBS. */
-#define	RT_MAXFIBS	16
+#define	RT_MAXFIBS	UINT16_MAX
 
 /* Kernel config default option. */
 #ifdef ROUTETABLES
@@ -87,17 +87,17 @@
 #define	RT_NUMFIBS	1
 #endif
 
+#if defined(INET) || defined(INET6)
+#ifdef SCTP
+extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
+#endif /* SCTP */
+#endif
+
+
+/* This is read-only.. */
 u_int rt_numfibs = RT_NUMFIBS;
 SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
-/*
- * Allow the boot code to allow LESS than RT_MAXFIBS to be used.
- * We can't do more because storage is statically allocated for now.
- * (for compatibility reasons.. this will change. When this changes, code should
- * be refactored to protocol independent parts and protocol dependent parts,
- * probably hanging of domain(9) specific storage to not need the full
- * fib * af RNH allocation etc. but allow tuning the number of tables per
- * address family).
- */
+/* and this can be set too big but will be fixed before it is used */
 TUNABLE_INT("net.fibs", &rt_numfibs);
 
 /*
@@ -126,7 +126,8 @@
 
 
 /* compare two sockaddr structures */
-#define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
+#define	sa_equal(a1, a2) (((a1)->sa_len == (a2)->sa_len) && \
+    (bcmp((a1), (a2), (a1)->sa_len) == 0))
 
 /*
  * Convert a 'struct radix_node *' to a 'struct rtentry *'.
@@ -209,7 +210,49 @@
 }
 SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
 
+static int
+rtentry_zinit(void *mem, int size, int how)
+{
+	struct rtentry *rt = mem;
+
+	rt->rt_pksent = counter_u64_alloc(how);
+	if (rt->rt_pksent == NULL)
+		return (ENOMEM);
+
+	RT_LOCK_INIT(rt);
+
+	return (0);
+}
+
 static void
+rtentry_zfini(void *mem, int size)
+{
+	struct rtentry *rt = mem;
+
+	RT_LOCK_DESTROY(rt);
+	counter_u64_free(rt->rt_pksent);
+}
+
+static int
+rtentry_ctor(void *mem, int size, void *arg, int how)
+{
+	struct rtentry *rt = mem;
+
+	bzero(rt, offsetof(struct rtentry, rt_endzero));
+	counter_u64_zero(rt->rt_pksent);
+
+	return (0);
+}
+
+static void
+rtentry_dtor(void *mem, int size, void *arg)
+{
+	struct rtentry *rt = mem;
+
+	RT_UNLOCK_COND(rt);
+}
+
+static void
 vnet_route_init(const void *unused __unused)
 {
 	struct domain *dom;
@@ -220,8 +263,9 @@
 	V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) *
 	    sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO);
 
-	V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
-	    NULL, NULL, UMA_ALIGN_PTR, 0);
+	V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry),
+	    rtentry_ctor, rtentry_dtor,
+	    rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0);
 	for (dom = domains; dom; dom = dom->dom_next) {
 		if (dom->dom_rtattach == NULL)
 			continue;
@@ -271,6 +315,9 @@
 			dom->dom_rtdetach((void **)rnh, dom->dom_rtoffset);
 		}
 	}
+
+	free(V_rt_tables, M_RTABLE);
+	uma_zdestroy(V_rtzone);
 }
 VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
     vnet_route_uninit, 0);
@@ -495,7 +542,6 @@
 		/*
 		 * and the rtentry itself of course
 		 */
-		RT_LOCK_DESTROY(rt);
 		uma_zfree(V_rtzone, rt);
 		return;
 	}
@@ -544,7 +590,7 @@
 	}
 
 	/* verify the gateway is directly reachable */
-	if ((ifa = ifa_ifwithnet(gateway, 0)) == NULL) {
+	if ((ifa = ifa_ifwithnet_fib(gateway, 0, fibnum)) == NULL) {
 		error = ENETUNREACH;
 		goto out;
 	}
@@ -701,7 +747,7 @@
 		 */
 		ifa = NULL;
 		if (flags & RTF_HOST)
-			ifa = ifa_ifwithdstaddr(dst);
+			ifa = ifa_ifwithdstaddr_fib(dst, fibnum);
 		if (ifa == NULL)
 			ifa = ifa_ifwithaddr(gateway);
 	} else {
@@ -710,10 +756,10 @@
 		 * or host, the gateway may still be on the
 		 * other end of a pt to pt link.
 		 */
-		ifa = ifa_ifwithdstaddr(gateway);
+		ifa = ifa_ifwithdstaddr_fib(gateway, fibnum);
 	}
 	if (ifa == NULL)
-		ifa = ifa_ifwithnet(gateway, 0);
+		ifa = ifa_ifwithnet_fib(gateway, 0, fibnum);
 	if (ifa == NULL) {
 		struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum);
 		if (rt == NULL)
@@ -827,7 +873,7 @@
 	 */
 	if (info->rti_ifp == NULL && ifpaddr != NULL &&
 	    ifpaddr->sa_family == AF_LINK &&
-	    (ifa = ifa_ifwithnet(ifpaddr, 0)) != NULL) {
+	    (ifa = ifa_ifwithnet_fib(ifpaddr, 0, fibnum)) != NULL) {
 		info->rti_ifp = ifa->ifa_ifp;
 		ifa_free(ifa);
 	}
@@ -943,6 +989,57 @@
 	return (error);
 }
 
+#if 0
+int p_sockaddr(char *buf, int buflen, struct sockaddr *s);
+int rt_print(char *buf, int buflen, struct rtentry *rt);
+
+int
+p_sockaddr(char *buf, int buflen, struct sockaddr *s)
+{
+	void *paddr = NULL;
+
+	switch (s->sa_family) {
+	case AF_INET:
+		paddr = &((struct sockaddr_in *)s)->sin_addr;
+		break;
+	case AF_INET6:
+		paddr = &((struct sockaddr_in6 *)s)->sin6_addr;
+		break;
+	}
+
+	if (paddr == NULL)
+		return (0);
+
+	if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL)
+		return (0);
+	
+	return (strlen(buf));
+}
+
+int
+rt_print(char *buf, int buflen, struct rtentry *rt)
+{
+	struct sockaddr *addr, *mask;
+	int i = 0;
+
+	addr = rt_key(rt);
+	mask = rt_mask(rt);
+
+	i = p_sockaddr(buf, buflen, addr);
+	if (!(rt->rt_flags & RTF_HOST)) {
+		buf[i++] = '/';
+		i += p_sockaddr(buf + i, buflen - i, mask);
+	}
+
+	if (rt->rt_flags & RTF_GATEWAY) {
+		buf[i++] = '>';
+		i += p_sockaddr(buf + i, buflen - i, rt->rt_gateway);
+	}
+
+	return (i);
+}
+#endif
+
 #ifdef RADIX_MPATH
 static int
 rn_mpath_update(int req, struct rt_addrinfo *info,
@@ -956,10 +1053,11 @@
 	register struct radix_node *rn;
 	int error = 0;
 
-	rn = rnh->rnh_matchaddr(dst, rnh);
+	rn = rnh->rnh_lookup(dst, netmask, rnh);
 	if (rn == NULL)
 		return (ESRCH);
 	rto = rt = RNTORT(rn);
+
 	rt = rt_mpath_matchgate(rt, gateway);
 	if (rt == NULL)
 		return (ESRCH);
@@ -1179,13 +1277,11 @@
 		} else
 			ifa_ref(info->rti_ifa);
 		ifa = info->rti_ifa;
-		rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO);
+		rt = uma_zalloc(V_rtzone, M_NOWAIT);
 		if (rt == NULL) {
-			if (ifa != NULL)
-				ifa_free(ifa);
+			ifa_free(ifa);
 			senderr(ENOBUFS);
 		}
-		RT_LOCK_INIT(rt);
 		rt->rt_flags = RTF_UP | flags;
 		rt->rt_fibnum = fibnum;
 		/*
@@ -1193,9 +1289,7 @@
 		 */
 		RT_LOCK(rt);
 		if ((error = rt_setgate(rt, dst, gateway)) != 0) {
-			RT_LOCK_DESTROY(rt);
-			if (ifa != NULL)
-				ifa_free(ifa);
+			ifa_free(ifa);
 			uma_zfree(V_rtzone, rt);
 			senderr(error);
 		}
@@ -1220,17 +1314,14 @@
 		 */
 		rt->rt_ifa = ifa;
 		rt->rt_ifp = ifa->ifa_ifp;
-		rt->rt_rmx.rmx_weight = 1;
+		rt->rt_weight = 1;
 
 #ifdef RADIX_MPATH
 		/* do not permit exactly the same dst/mask/gw pair */
 		if (rn_mpath_capable(rnh) &&
 			rt_mpath_conflict(rnh, rt, netmask)) {
-			if (rt->rt_ifa) {
-				ifa_free(rt->rt_ifa);
-			}
+			ifa_free(rt->rt_ifa);
 			Free(rt_key(rt));
-			RT_LOCK_DESTROY(rt);
 			uma_zfree(V_rtzone, rt);
 			senderr(EEXIST);
 		}
@@ -1296,10 +1387,8 @@
 		 * then un-make it (this should be a function)
 		 */
 		if (rn == NULL) {
-			if (rt->rt_ifa)
-				ifa_free(rt->rt_ifa);
+			ifa_free(rt->rt_ifa);
 			Free(rt_key(rt));
-			RT_LOCK_DESTROY(rt);
 			uma_zfree(V_rtzone, rt);
 #ifdef FLOWTABLE
 			if (rt0 != NULL)
@@ -1309,18 +1398,7 @@
 		} 
 #ifdef FLOWTABLE
 		else if (rt0 != NULL) {
-			switch (dst->sa_family) {
-#ifdef INET6
-			case AF_INET6:
-				flowtable_route_flush(V_ip6_ft, rt0);
-				break;
-#endif
-#ifdef INET
-			case AF_INET:
-				flowtable_route_flush(V_ip_ft, rt0);
-				break;
-#endif
-			}
+			flowtable_route_flush(dst->sa_family, rt0);
 			RTFREE(rt0);
 		}
 #endif
@@ -1464,9 +1542,9 @@
 		fibnum = RT_DEFAULT_FIB;
 		break;
 	}
-	if (fibnum == -1) {
+	if (fibnum == RT_ALL_FIBS) {
 		if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) {
-			startfib = endfib = curthread->td_proc->p_fibnum;
+			startfib = endfib = ifa->ifa_ifp->if_fib;
 		} else {
 			startfib = 0;
 			endfib = rt_numfibs - 1;
@@ -1513,10 +1591,10 @@
 				/* this table doesn't exist but others might */
 				continue;
 			RADIX_NODE_HEAD_RLOCK(rnh);
+			rn = rnh->rnh_lookup(dst, netmask, rnh);
 #ifdef RADIX_MPATH
 			if (rn_mpath_capable(rnh)) {
 
-				rn = rnh->rnh_matchaddr(dst, rnh);
 				if (rn == NULL) 
 					error = ESRCH;
 				else {
@@ -1530,17 +1608,14 @@
 					 */
 					rt = rt_mpath_matchgate(rt,
 					    ifa->ifa_addr);
-					if (!rt) 
+					if (rt == NULL) 
 						error = ESRCH;
 				}
 			}
-			else
 #endif
-			rn = rnh->rnh_lookup(dst, netmask, rnh);
 			error = (rn == NULL ||
 			    (rn->rn_flags & RNF_ROOT) ||
-			    RNTORT(rn)->rt_ifa != ifa ||
-			    !sa_equal((struct sockaddr *)rn->rn_key, dst));
+			    RNTORT(rn)->rt_ifa != ifa);
 			RADIX_NODE_HEAD_RUNLOCK(rnh);
 			if (error) {
 				/* this is only an error if bad on ALL tables */
@@ -1580,7 +1655,7 @@
 			info.rti_ifa = NULL;
 			info.rti_flags = RTF_RNH_LOCKED;
 
-			error = rtrequest1_fib(RTM_DELETE, &info, &rt, fibnum);
+			error = rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum);
 			if (error == 0) {
 				info.rti_ifa = ifa;
 				info.rti_flags = flags | RTF_RNH_LOCKED |
@@ -1666,15 +1741,6 @@
 	return (error);
 }
 
-#ifndef BURN_BRIDGES
-/* special one for inet internal use. may not use. */
-int
-rtinit_fib(struct ifaddr *ifa, int cmd, int flags)
-{
-	return (rtinit1(ifa, cmd, flags, -1));
-}
-#endif
-
 /*
  * Set up a routing table entry, normally
  * for an interface.
@@ -1695,8 +1761,94 @@
 	case AF_INET6:
 	case AF_INET:
 		/* We do support multiple FIBs. */
-		fib = -1;
+		fib = RT_ALL_FIBS;
 		break;
 	}
 	return (rtinit1(ifa, cmd, flags, fib));
 }
+
+/*
+ * Announce interface address arrival/withdraw
+ * Returns 0 on success.
+ */
+int
+rt_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
+{
+
+	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
+	    ("unexpected cmd %d", cmd));
+	
+	KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
+	    ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
+
+#if defined(INET) || defined(INET6)
+#ifdef SCTP
+	/*
+	 * notify the SCTP stack
+	 * this will only get called when an address is added/deleted
+	 * XXX pass the ifaddr struct instead if ifa->ifa_addr...
+	 */
+	sctp_addr_change(ifa, cmd);
+#endif /* SCTP */
+#endif
+	return (rtsock_addrmsg(cmd, ifa, fibnum));
+}
+
+/*
+ * Announce route addition/removal.
+ * Users of this function MUST validate input data BEFORE calling.
+ * However we have to be able to handle invalid data:
+ * if some userland app sends us "invalid" route message (invalid mask,
+ * no dst, wrong address families, etc...) we need to pass it back
+ * to app (and any other rtsock consumers) with rtm_errno field set to
+ * non-zero value.
+ * Returns 0 on success.
+ */
+int
+rt_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt,
+    int fibnum)
+{
+
+	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
+	    ("unexpected cmd %d", cmd));
+	
+	KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
+	    ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
+
+	KASSERT(rt_key(rt) != NULL, (":%s: rt_key must be supplied", __func__));
+
+	return (rtsock_routemsg(cmd, ifp, error, rt, fibnum));
+}
+
+void
+rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
+{
+
+	rt_newaddrmsg_fib(cmd, ifa, error, rt, RT_ALL_FIBS);
+}
+
+/*
+ * This is called to generate messages from the routing socket
+ * indicating a network interface has had addresses associated with it.
+ */
+void
+rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt,
+    int fibnum)
+{
+
+	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
+		("unexpected cmd %u", cmd));
+	KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
+	    ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
+
+	if (cmd == RTM_ADD) {
+		rt_addrmsg(cmd, ifa, fibnum);
+		if (rt != NULL)
+			rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum);
+	} else {
+		if (rt != NULL)
+			rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum);
+		rt_addrmsg(cmd, ifa, fibnum);
+	}
+}
+

Modified: trunk/sys/net/route.h
===================================================================
--- trunk/sys/net/route.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/route.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,12 +28,14 @@
  * SUCH DAMAGE.
  *
  *	@(#)route.h	8.4 (Berkeley) 1/9/95
- * $FreeBSD: stable/9/sys/net/route.h 248895 2013-03-29 16:24:20Z melifaro $
+ * $FreeBSD: stable/10/sys/net/route.h 265717 2014-05-08 21:03:31Z melifaro $
  */
 
 #ifndef _NET_ROUTE_H_
 #define _NET_ROUTE_H_
 
+#include <sys/counter.h>
+
 /*
  * Kernel resident routing tables.
  *
@@ -58,17 +60,6 @@
 #define	RT_CACHING_CONTEXT	0x1	/* XXX: not used anywhere */
 #define	RT_NORTREF		0x2	/* doesn't hold reference on ro_rt */
 
-/*
- * These numbers are used by reliable protocols for determining
- * retransmission behavior and are included in the routing structure.
- */
-struct rt_metrics_lite {
-	u_long	rmx_mtu;	/* MTU for this path */
-	u_long	rmx_expire;	/* lifetime for route, e.g. redirect */
-	u_long	rmx_pksent;	/* packets sent using this route */
-	u_long	rmx_weight;	/* absolute weight */ 
-};
-
 struct rt_metrics {
 	u_long	rmx_locks;	/* Kernel must leave these values alone */
 	u_long	rmx_mtu;	/* MTU for this path */
@@ -93,11 +84,9 @@
 #define	RTTTOPRHZ(r)	((r) / (RTM_RTTUNIT / PR_SLOWHZ))
 
 #define	RT_DEFAULT_FIB	0	/* Explicitly mark fib=0 restricted cases */
-extern u_int rt_numfibs;	/* number fo usable routing tables */
-/*
- * XXX kernel function pointer `rt_output' is visible to applications.
- */
-struct mbuf;
+#define	RT_ALL_FIBS	-1	/* Announce event for every fib */
+extern u_int rt_numfibs;	/* number of usable routing tables */
+extern u_int rt_add_addr_allfibs;	/* Announce interfaces to all fibs */
 
 /*
  * We distinguish between routes to hosts and routes to networks,
@@ -113,6 +102,8 @@
 #include <net/radix_mpath.h>
 #endif
 #endif
+
+#if defined(_KERNEL) || defined(_WANT_RTENTRY)
 struct rtentry {
 	struct	radix_node rt_nodes[2];	/* tree glue, and other values */
 	/*
@@ -123,34 +114,20 @@
 #define	rt_key(r)	(*((struct sockaddr **)(&(r)->rt_nodes->rn_key)))
 #define	rt_mask(r)	(*((struct sockaddr **)(&(r)->rt_nodes->rn_mask)))
 	struct	sockaddr *rt_gateway;	/* value */
-	int	rt_flags;		/* up/down?, host/net */
-	int	rt_refcnt;		/* # held references */
 	struct	ifnet *rt_ifp;		/* the answer: interface to use */
 	struct	ifaddr *rt_ifa;		/* the answer: interface address to use */
-	struct	rt_metrics_lite rt_rmx;	/* metrics used by rx'ing protocols */
-	u_int	rt_fibnum;		/* which FIB */
-#ifdef _KERNEL
-	/* XXX ugly, user apps use this definition but don't have a mtx def */
-	struct	mtx rt_mtx;		/* mutex for routing entry */
-#endif
+	int		rt_flags;	/* up/down?, host/net */
+	int		rt_refcnt;	/* # held references */
+	u_int		rt_fibnum;	/* which FIB */
+	u_long		rt_mtu;		/* MTU for this path */
+	u_long		rt_weight;	/* absolute weight */ 
+	u_long		rt_expire;	/* lifetime for route, e.g. redirect */
+#define	rt_endzero	rt_pksent
+	counter_u64_t	rt_pksent;	/* packets sent using this route */
+	struct mtx	rt_mtx;		/* mutex for routing entry */
 };
+#endif /* _KERNEL || _WANT_RTENTRY */
 
-/*
- * Following structure necessary for 4.3 compatibility;
- * We should eventually move it to a compat file.
- */
-struct ortentry {
-	u_long	rt_hash;		/* to speed lookups */
-	struct	sockaddr rt_dst;	/* key */
-	struct	sockaddr rt_gateway;	/* value */
-	short	rt_flags;		/* up/down?, host/net */
-	short	rt_refcnt;		/* # held references */
-	u_long	rt_use;			/* raw # packets forwarded */
-	struct	ifnet *rt_ifp;		/* the answer: interface to use */
-};
-
-#define rt_use rt_rmx.rmx_pksent
-
 #define	RTF_UP		0x1		/* route usable */
 #define	RTF_GATEWAY	0x2		/* destination is a gateway */
 #define	RTF_HOST	0x4		/* host entry (net otherwise) */
@@ -168,12 +145,7 @@
 #define RTF_BLACKHOLE	0x1000		/* just discard pkts (during updates) */
 #define RTF_PROTO2	0x4000		/* protocol specific routing flag */
 #define RTF_PROTO1	0x8000		/* protocol specific routing flag */
-
-/* XXX: temporary to stay API/ABI compatible with userland */
-#ifndef _KERNEL
-#define RTF_PRCLONING	0x10000		/* unused, for compatibility */
-#endif
-
+/*			0x10000		   unused, was RTF_PRCLONING */
 /*			0x20000		   unused, was RTF_WASCLONED */
 #define RTF_PROTO3	0x40000		/* protocol specific routing flag */
 /*			0x80000		   unused */
@@ -186,6 +158,9 @@
 
 #define	RTF_RNH_LOCKED	 0x40000000	/* radix node head is locked */
 
+#define	RTF_GWFLAG_COMPAT 0x80000000	/* a compatibility bit for interacting
+					   with existing routing apps */
+
 /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */
 #define RTF_FMASK	\
 	(RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \
@@ -232,8 +207,8 @@
 #define RTM_REDIRECT	0x6	/* Told to use different route */
 #define RTM_MISS	0x7	/* Lookup failed on this address */
 #define RTM_LOCK	0x8	/* fix specified metrics */
-#define RTM_OLDADD	0x9	/* caused by SIOCADDRT */
-#define RTM_OLDDEL	0xa	/* caused by SIOCDELRT */
+		    /*	0x9  */
+		    /*	0xa  */
 #define RTM_RESOLVE	0xb	/* req to resolve dst to LL addr */
 #define RTM_NEWADDR	0xc	/* address being added to iface */
 #define RTM_DELADDR	0xd	/* address being removed from iface */
@@ -312,6 +287,10 @@
 #define	RT_UNLOCK(_rt)		mtx_unlock(&(_rt)->rt_mtx)
 #define	RT_LOCK_DESTROY(_rt)	mtx_destroy(&(_rt)->rt_mtx)
 #define	RT_LOCK_ASSERT(_rt)	mtx_assert(&(_rt)->rt_mtx, MA_OWNED)
+#define	RT_UNLOCK_COND(_rt)	do {				\
+	if (mtx_owned(&(_rt)->rt_mtx))				\
+		mtx_unlock(&(_rt)->rt_mtx);			\
+} while (0)
 
 #define	RT_ADDREF(_rt)	do {					\
 	RT_LOCK_ASSERT(_rt);					\
@@ -366,10 +345,15 @@
 void	 rt_missmsg_fib(int, struct rt_addrinfo *, int, int, int);
 void	 rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *);
 void	 rt_newaddrmsg_fib(int, struct ifaddr *, int, struct rtentry *, int);
+int	 rt_addrmsg(int, struct ifaddr *, int);
+int	 rt_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
 void	 rt_newmaddrmsg(int, struct ifmultiaddr *);
 int	 rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
 void 	 rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *);
 
+int	rtsock_addrmsg(int, struct ifaddr *, int);
+int	rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
+
 /*
  * Note the following locking behavior:
  *
@@ -399,11 +383,6 @@
 int	 rtrequest(int, struct sockaddr *,
 	    struct sockaddr *, struct sockaddr *, int, struct rtentry **);
 
-#ifndef BURN_BRIDGES
-/* defaults to "all" FIBs */
-int	 rtinit_fib(struct ifaddr *, int, int);
-#endif
-
 /* XXX MRT NEW VERSIONS THAT USE FIBs
  * For now the protocol indepedent versions are the same as the AF_INET ones
  * but this will change.. 

Modified: trunk/sys/net/rtsock.c
===================================================================
--- trunk/sys/net/rtsock.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/rtsock.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,10 +28,9 @@
  * SUCH DAMAGE.
  *
  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
- * $FreeBSD: stable/9/sys/net/rtsock.c 248085 2013-03-09 02:36:32Z marius $
+ * $FreeBSD: stable/10/sys/net/rtsock.c 302233 2016-06-27 21:44:27Z bdrewery $
  */
 #include "opt_compat.h"
-#include "opt_sctp.h"
 #include "opt_mpath.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -53,6 +52,7 @@
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
+#define	_IN_NET_RTSOCK_C
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_llatbl.h>
@@ -64,16 +64,12 @@
 
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
+#include <netinet/ip_carp.h>
 #ifdef INET6
+#include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #endif
 
-#if defined(INET) || defined(INET6)
-#ifdef SCTP
-extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
-#endif /* SCTP */
-#endif
-
 #ifdef COMPAT_FREEBSD32
 #include <sys/mount.h>
 #include <compat/freebsd32/freebsd32.h>
@@ -84,8 +80,8 @@
 	uint8_t	ifi_addrlen;
 	uint8_t	ifi_hdrlen;
 	uint8_t	ifi_link_state;
-	uint8_t	ifi_spare_char1;
-	uint8_t	ifi_spare_char2;
+	uint8_t	ifi_vhid;
+	uint8_t	ifi_baudrate_pf;
 	uint8_t	ifi_datalen;
 	uint32_t ifi_mtu;
 	uint32_t ifi_metric;
@@ -104,6 +100,7 @@
 	uint32_t ifi_hwassist;
 	int32_t	ifi_epoch;
 	struct	timeval32 ifi_lastchange;
+	uint32_t ifi_oqdrops;
 };
 
 struct if_msghdr32 {
@@ -150,12 +147,14 @@
 static struct	sockaddr route_src = { 2, PF_ROUTE, };
 static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
 
+/* These are external hooks for CARP. */
+int	(*carp_get_vhid_p)(struct ifaddr *);
+
 /*
  * Used by rtsock/raw_input callback code to decide whether to filter the update
  * notification to a socket bound to a particular FIB.
  */
 #define	RTS_FILTER_FIB	M_PROTO8
-#define	RTS_ALLFIBS	-1
 
 static struct {
 	int	ip_count;	/* attached w/ AF_INET */
@@ -190,10 +189,8 @@
 static int	sysctl_iflist(int af, struct walkarg *w);
 static int	sysctl_ifmalist(int af, struct walkarg *w);
 static int	route_output(struct mbuf *m, struct socket *so);
-static void	rt_setmetrics(u_long which, const struct rt_metrics *in,
-			struct rt_metrics_lite *out);
-static void	rt_getmetrics(const struct rt_metrics_lite *in,
-			struct rt_metrics *out);
+static void	rt_setmetrics(const struct rt_msghdr *rtm, struct rtentry *rt);
+static void	rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out);
 static void	rt_dispatch(struct mbuf *, sa_family_t);
 
 static struct netisr_handler rtsock_nh = {
@@ -299,29 +296,18 @@
 rts_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct rawcb *rp;
-	int s, error;
+	int error;
 
 	KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
 
 	/* XXX */
 	rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
-	if (rp == NULL)
-		return ENOBUFS;
 
-	/*
-	 * The splnet() is necessary to block protocols from sending
-	 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
-	 * this PCB is extant but incompletely initialized.
-	 * Probably we should try to do more of this work beforehand and
-	 * eliminate the spl.
-	 */
-	s = splnet();
 	so->so_pcb = (caddr_t)rp;
 	so->so_fibnum = td->td_proc->p_fibnum;
 	error = raw_attach(so, proto);
 	rp = sotorawcb(so);
 	if (error) {
-		splx(s);
 		so->so_pcb = NULL;
 		free(rp, M_PCB);
 		return error;
@@ -342,7 +328,6 @@
 	RTSOCK_UNLOCK();
 	soisconnected(so);
 	so->so_options |= SO_USELOOPBACK;
-	splx(s);
 	return 0;
 }
 
@@ -570,6 +555,11 @@
 	struct rtentry *rt = NULL;
 	struct radix_node_head *rnh;
 	struct rt_addrinfo info;
+#ifdef INET6
+	struct sockaddr_storage ss;
+	struct sockaddr_in6 *sin6;
+	int i, rti_need_deembed = 0;
+#endif
 	int len, error = 0;
 	struct ifnet *ifp = NULL;
 	union sockaddr_union saun;
@@ -600,6 +590,11 @@
 	rtm->rtm_pid = curproc->p_pid;
 	bzero(&info, sizeof(info));
 	info.rti_addrs = rtm->rtm_addrs;
+	/*
+	 * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6
+	 * link-local address because rtrequest requires addresses with
+	 * embedded scope id.
+	 */
 	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
 		info.rti_info[RTAX_DST] = NULL;
 		senderr(EINVAL);
@@ -648,8 +643,10 @@
 		 */
 		if (gw_ro.ro_rt != NULL &&
 		    gw_ro.ro_rt->rt_gateway->sa_family == AF_LINK &&
-		    gw_ro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)
+		    gw_ro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) {
 			info.rti_flags &= ~RTF_GATEWAY;
+			info.rti_flags |= RTF_GWFLAG_COMPAT;
+		}
 		if (gw_ro.ro_rt != NULL)
 			RTFREE(gw_ro.ro_rt);
 	}
@@ -666,14 +663,20 @@
 		if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
 		    (rtm->rtm_flags & RTF_LLDATA) != 0) {
 			error = lla_rt_output(rtm, &info);
+#ifdef INET6
+			if (error == 0)
+				rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
 			break;
 		}
 		error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt,
 		    so->so_fibnum);
 		if (error == 0 && saved_nrt) {
+#ifdef INET6
+			rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
 			RT_LOCK(saved_nrt);
-			rt_setmetrics(rtm->rtm_inits,
-				&rtm->rtm_rmx, &saved_nrt->rt_rmx);
+			rt_setmetrics(rtm, saved_nrt);
 			rtm->rtm_index = saved_nrt->rt_ifp->if_index;
 			RT_REMREF(saved_nrt);
 			RT_UNLOCK(saved_nrt);
@@ -687,6 +690,10 @@
 		    (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) &&
 		    (rtm->rtm_flags & RTF_LLDATA) != 0) {
 			error = lla_rt_output(rtm, &info);
+#ifdef INET6
+			if (error == 0)
+				rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
 			break;
 		}
 		error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt,
@@ -696,6 +703,10 @@
 			rt = saved_nrt;
 			goto report;
 		}
+#ifdef INET6
+		/* rt_msg2() will not be used when RTM_DELETE fails. */
+		rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
 		break;
 
 	case RTM_GET:
@@ -705,10 +716,24 @@
 		    info.rti_info[RTAX_DST]->sa_family);
 		if (rnh == NULL)
 			senderr(EAFNOSUPPORT);
+
 		RADIX_NODE_HEAD_RLOCK(rnh);
-		rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
-			info.rti_info[RTAX_NETMASK], rnh);
-		if (rt == NULL) {	/* XXX looks bogus */
+
+		if (info.rti_info[RTAX_NETMASK] == NULL &&
+		    rtm->rtm_type == RTM_GET) {
+			/*
+			 * Provide logest prefix match for
+			 * address lookup (no mask).
+			 * 'route -n get addr'
+			 */
+			rt = (struct rtentry *) rnh->rnh_matchaddr(
+			    info.rti_info[RTAX_DST], rnh);
+		} else
+			rt = (struct rtentry *) rnh->rnh_lookup(
+			    info.rti_info[RTAX_DST],
+			    info.rti_info[RTAX_NETMASK], rnh);
+
+		if (rt == NULL) {
 			RADIX_NODE_HEAD_RUNLOCK(rnh);
 			senderr(ESRCH);
 		}
@@ -765,25 +790,6 @@
 		RT_ADDREF(rt);
 		RADIX_NODE_HEAD_RUNLOCK(rnh);
 
-		/* 
-		 * Fix for PR: 82974
-		 *
-		 * RTM_CHANGE/LOCK need a perfect match, rn_lookup()
-		 * returns a perfect match in case a netmask is
-		 * specified.  For host routes only a longest prefix
-		 * match is returned so it is necessary to compare the
-		 * existence of the netmask.  If both have a netmask
-		 * rnh_lookup() did a perfect match and if none of them
-		 * have a netmask both are host routes which is also a
-		 * perfect match.
-		 */
-
-		if (rtm->rtm_type != RTM_GET && 
-		    (!rt_mask(rt) != !info.rti_info[RTAX_NETMASK])) {
-			RT_UNLOCK(rt);
-			senderr(ESRCH);
-		}
-
 		switch(rtm->rtm_type) {
 
 		case RTM_GET:
@@ -834,8 +840,12 @@
 				Free(rtm); rtm = new_rtm;
 			}
 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
-			rtm->rtm_flags = rt->rt_flags;
-			rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
+			if (rt->rt_flags & RTF_GWFLAG_COMPAT)
+				rtm->rtm_flags = RTF_GATEWAY | 
+					(rt->rt_flags & ~RTF_GWFLAG_COMPAT);
+			else
+				rtm->rtm_flags = rt->rt_flags;
+			rt_getmetrics(rt, &rtm->rtm_rmx);
 			rtm->rtm_addrs = info.rti_addrs;
 			break;
 
@@ -886,6 +896,7 @@
 					RT_UNLOCK(rt);
 					senderr(error);
 				}
+				rt->rt_flags &= ~RTF_GATEWAY;
 				rt->rt_flags |= (RTF_GATEWAY & info.rti_flags);
 			}
 			if (info.rti_ifa != NULL &&
@@ -897,8 +908,7 @@
 			/* Allow some flags to be toggled on change. */
 			rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) |
 				    (rtm->rtm_flags & RTF_FMASK);
-			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
-					&rt->rt_rmx);
+			rt_setmetrics(rtm, rt);
 			rtm->rtm_index = rt->rt_ifp->if_index;
 			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
 			       rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
@@ -939,6 +949,22 @@
 		rp = sotorawcb(so);
 	}
 	if (rtm) {
+#ifdef INET6
+		if (rti_need_deembed) {
+			/* sin6_scope_id is recovered before sending rtm. */
+			sin6 = (struct sockaddr_in6 *)&ss;
+			for (i = 0; i < RTAX_MAX; i++) {
+				if (info.rti_info[i] == NULL)
+					continue;
+				if (info.rti_info[i]->sa_family != AF_INET6)
+					continue;
+				bcopy(info.rti_info[i], sin6, sizeof(*sin6));
+				if (sa6_recoverscope(sin6) == 0)
+					bcopy(sin6, info.rti_info[i],
+						    sizeof(*sin6));
+			}
+		}
+#endif
 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
 			m_freem(m);
@@ -970,34 +996,30 @@
 }
 
 static void
-rt_setmetrics(u_long which, const struct rt_metrics *in,
-	struct rt_metrics_lite *out)
+rt_setmetrics(const struct rt_msghdr *rtm, struct rtentry *rt)
 {
-#define metric(f, e) if (which & (f)) out->e = in->e;
-	/*
-	 * Only these are stored in the routing entry since introduction
-	 * of tcp hostcache. The rest is ignored.
-	 */
-	metric(RTV_MTU, rmx_mtu);
-	metric(RTV_WEIGHT, rmx_weight);
-	/* Userland -> kernel timebase conversion. */
-	if (which & RTV_EXPIRE)
-		out->rmx_expire = in->rmx_expire ?
-		    in->rmx_expire - time_second + time_uptime : 0;
-#undef metric
+
+	if (rtm->rtm_inits & RTV_MTU)
+		rt->rt_mtu = rtm->rtm_rmx.rmx_mtu;
+	if (rtm->rtm_inits & RTV_WEIGHT)
+		rt->rt_weight = rtm->rtm_rmx.rmx_weight;
+	/* Kernel -> userland timebase conversion. */
+	if (rtm->rtm_inits & RTV_EXPIRE)
+		rt->rt_expire = rtm->rtm_rmx.rmx_expire ?
+		    rtm->rtm_rmx.rmx_expire - time_second + time_uptime : 0;
 }
 
 static void
-rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out)
+rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
 {
-#define metric(e) out->e = in->e;
+
 	bzero(out, sizeof(*out));
-	metric(rmx_mtu);
-	metric(rmx_weight);
+	out->rmx_mtu = rt->rt_mtu;
+	out->rmx_weight = rt->rt_weight;
+	out->rmx_pksent = counter_u64_fetch(rt->rt_pksent);
 	/* Kernel -> userland timebase conversion. */
-	out->rmx_expire = in->rmx_expire ?
-	    in->rmx_expire - time_uptime + time_second : 0;
-#undef metric
+	out->rmx_expire = rt->rt_expire ?
+	    rt->rt_expire - time_uptime + time_second : 0;
 }
 
 /*
@@ -1032,6 +1054,11 @@
 			return (0); /* should be EINVAL but for compat */
 		}
 		/* accept it */
+#ifdef INET6
+		if (sa->sa_family == AF_INET6)
+			sa6_embedscope((struct sockaddr_in6 *)sa,
+			    V_ip6_use_defzone);
+#endif
 		rtinfo->rti_info[i] = sa;
 		cp += SA_SIZE(sa);
 	}
@@ -1048,6 +1075,10 @@
 	struct mbuf *m;
 	int i;
 	struct sockaddr *sa;
+#ifdef INET6
+	struct sockaddr_storage ss;
+	struct sockaddr_in6 *sin6;
+#endif
 	int len, dlen;
 
 	switch (type) {
@@ -1074,20 +1105,17 @@
 	default:
 		len = sizeof(struct rt_msghdr);
 	}
-	if (len > MCLBYTES)
-		panic("rt_msg1");
-	m = m_gethdr(M_DONTWAIT, MT_DATA);
-	if (m && len > MHLEN) {
-		MCLGET(m, M_DONTWAIT);
-		if ((m->m_flags & M_EXT) == 0) {
-			m_free(m);
-			m = NULL;
-		}
-	}
+
+	/* XXXGL: can we use MJUMPAGESIZE cluster here? */
+	KASSERT(len <= MCLBYTES, ("%s: message too big", __func__));
+	if (len > MHLEN)
+		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+	else
+		m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (m);
+
 	m->m_pkthdr.len = m->m_len = len;
-	m->m_pkthdr.rcvif = NULL;
 	rtm = mtod(m, struct rt_msghdr *);
 	bzero((caddr_t)rtm, len);
 	for (i = 0; i < RTAX_MAX; i++) {
@@ -1095,6 +1123,14 @@
 			continue;
 		rtinfo->rti_addrs |= (1 << i);
 		dlen = SA_SIZE(sa);
+#ifdef INET6
+		if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
+			sin6 = (struct sockaddr_in6 *)&ss;
+			bcopy(sa, sin6, sizeof(*sin6));
+			if (sa6_recoverscope(sin6) == 0)
+				sa = (struct sockaddr *)sin6;
+		}
+#endif
 		m_copyback(m, len, dlen, (caddr_t)sa);
 		len += dlen;
 	}
@@ -1117,6 +1153,10 @@
 	int i;
 	int len, dlen, second_time = 0;
 	caddr_t cp0;
+#ifdef INET6
+	struct sockaddr_storage ss;
+	struct sockaddr_in6 *sin6;
+#endif
 
 	rtinfo->rti_addrs = 0;
 again:
@@ -1169,6 +1209,14 @@
 		rtinfo->rti_addrs |= (1 << i);
 		dlen = SA_SIZE(sa);
 		if (cp) {
+#ifdef INET6
+			if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
+				sin6 = (struct sockaddr_in6 *)&ss;
+				bcopy(sa, sin6, sizeof(*sin6));
+				if (sa6_recoverscope(sin6) == 0)
+					sa = (struct sockaddr *)sin6;
+			}
+#endif
 			bcopy((caddr_t)sa, cp, (unsigned)dlen);
 			cp += dlen;
 		}
@@ -1224,7 +1272,7 @@
 	if (m == NULL)
 		return;
 
-	if (fibnum != RTS_ALLFIBS) {
+	if (fibnum != RT_ALL_FIBS) {
 		KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
 		    "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
 		M_SETFIB(m, fibnum);
@@ -1242,7 +1290,7 @@
 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
 {
 
-	rt_missmsg_fib(type, rtinfo, flags, error, RTS_ALLFIBS);
+	rt_missmsg_fib(type, rtinfo, flags, error, RT_ALL_FIBS);
 }
 
 /*
@@ -1271,89 +1319,92 @@
 }
 
 /*
- * This is called to generate messages from the routing socket
- * indicating a network interface has had addresses associated with it.
- * if we ever reverse the logic and replace messages TO the routing
- * socket indicate a request to configure interfaces, then it will
- * be unnecessary as the routing socket will automatically generate
- * copies of it.
+ * Announce interface address arrival/withdraw.
+ * Please do not call directly, use rt_addrmsg().
+ * Assume input data to be valid.
+ * Returns 0 on success.
  */
-void
-rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt,
-    int fibnum)
+int
+rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
 {
 	struct rt_addrinfo info;
-	struct sockaddr *sa = NULL;
-	int pass;
-	struct mbuf *m = NULL;
+	struct sockaddr *sa;
+	int ncmd;
+	struct mbuf *m;
+	struct ifa_msghdr *ifam;
 	struct ifnet *ifp = ifa->ifa_ifp;
 
-	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
-		("unexpected cmd %u", cmd));
-#if defined(INET) || defined(INET6)
-#ifdef SCTP
-	/*
-	 * notify the SCTP stack
-	 * this will only get called when an address is added/deleted
-	 * XXX pass the ifaddr struct instead if ifa->ifa_addr...
-	 */
-	sctp_addr_change(ifa, cmd);
-#endif /* SCTP */
-#endif
 	if (route_cb.any_count == 0)
-		return;
-	for (pass = 1; pass < 3; pass++) {
-		bzero((caddr_t)&info, sizeof(info));
-		if ((cmd == RTM_ADD && pass == 1) ||
-		    (cmd == RTM_DELETE && pass == 2)) {
-			struct ifa_msghdr *ifam;
-			int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
+		return (0);
 
-			info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
-			info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
-			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
-			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
-			if ((m = rt_msg1(ncmd, &info)) == NULL)
-				continue;
-			ifam = mtod(m, struct ifa_msghdr *);
-			ifam->ifam_index = ifp->if_index;
-			ifam->ifam_metric = ifa->ifa_metric;
-			ifam->ifam_flags = ifa->ifa_flags;
-			ifam->ifam_addrs = info.rti_addrs;
-		}
-		if ((cmd == RTM_ADD && pass == 2) ||
-		    (cmd == RTM_DELETE && pass == 1)) {
-			struct rt_msghdr *rtm;
+	ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
 
-			if (rt == NULL)
-				continue;
-			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
-			info.rti_info[RTAX_DST] = sa = rt_key(rt);
-			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
-			if ((m = rt_msg1(cmd, &info)) == NULL)
-				continue;
-			rtm = mtod(m, struct rt_msghdr *);
-			rtm->rtm_index = ifp->if_index;
-			rtm->rtm_flags |= rt->rt_flags;
-			rtm->rtm_errno = error;
-			rtm->rtm_addrs = info.rti_addrs;
-		}
-		if (fibnum != RTS_ALLFIBS) {
-			KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: "
-			    "fibnum out of range 0 <= %d < %d", __func__,
-			     fibnum, rt_numfibs));
-			M_SETFIB(m, fibnum);
-			m->m_flags |= RTS_FILTER_FIB;
-		}
-		rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
+	bzero((caddr_t)&info, sizeof(info));
+	info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
+	info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
+	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
+	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
+	if ((m = rt_msg1(ncmd, &info)) == NULL)
+		return (ENOBUFS);
+	ifam = mtod(m, struct ifa_msghdr *);
+	ifam->ifam_index = ifp->if_index;
+	ifam->ifam_metric = ifa->ifa_metric;
+	ifam->ifam_flags = ifa->ifa_flags;
+	ifam->ifam_addrs = info.rti_addrs;
+
+	if (fibnum != RT_ALL_FIBS) {
+		M_SETFIB(m, fibnum);
+		m->m_flags |= RTS_FILTER_FIB;
 	}
+
+	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
+
+	return (0);
 }
 
-void
-rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
+/*
+ * Announce route addition/removal.
+ * Please do not call directly, use rt_routemsg().
+ * Note that @rt data MAY be inconsistent/invalid:
+ * if some userland app sends us "invalid" route message (invalid mask,
+ * no dst, wrong address families, etc...) we need to pass it back
+ * to app (and any other rtsock consumers) with rtm_errno field set to
+ * non-zero value.
+ *
+ * Returns 0 on success.
+ */
+int
+rtsock_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt,
+    int fibnum)
 {
+	struct rt_addrinfo info;
+	struct sockaddr *sa;
+	struct mbuf *m;
+	struct rt_msghdr *rtm;
 
-	rt_newaddrmsg_fib(cmd, ifa, error, rt, RTS_ALLFIBS);
+	if (route_cb.any_count == 0)
+		return (0);
+
+	bzero((caddr_t)&info, sizeof(info));
+	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+	info.rti_info[RTAX_DST] = sa = rt_key(rt);
+	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+	if ((m = rt_msg1(cmd, &info)) == NULL)
+		return (ENOBUFS);
+	rtm = mtod(m, struct rt_msghdr *);
+	rtm->rtm_index = ifp->if_index;
+	rtm->rtm_flags |= rt->rt_flags;
+	rtm->rtm_errno = error;
+	rtm->rtm_addrs = info.rti_addrs;
+
+	if (fibnum != RT_ALL_FIBS) {
+		M_SETFIB(m, fibnum);
+		m->m_flags |= RTS_FILTER_FIB;
+	}
+
+	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
+
+	return (0);
 }
 
 /*
@@ -1530,12 +1581,12 @@
 	if (w->w_req && w->w_tmem) {
 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
 
-		rtm->rtm_flags = rt->rt_flags;
-		/*
-		 * let's be honest about this being a retarded hack
-		 */
-		rtm->rtm_fmask = rt->rt_rmx.rmx_pksent;
-		rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
+		if (rt->rt_flags & RTF_GWFLAG_COMPAT)
+			rtm->rtm_flags = RTF_GATEWAY | 
+				(rt->rt_flags & ~RTF_GWFLAG_COMPAT);
+		else
+			rtm->rtm_flags = rt->rt_flags;
+		rt_getmetrics(rt, &rtm->rtm_rmx);
 		rtm->rtm_index = rt->rt_ifp->if_index;
 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
 		rtm->rtm_addrs = info.rti_addrs;
@@ -1556,6 +1607,8 @@
 	CP(*src, *dst, ifi_addrlen);
 	CP(*src, *dst, ifi_hdrlen);
 	CP(*src, *dst, ifi_link_state);
+	CP(*src, *dst, ifi_vhid);
+	CP(*src, *dst, ifi_baudrate_pf);
 	dst->ifi_datalen = sizeof(struct if_data32);
 	CP(*src, *dst, ifi_mtu);
 	CP(*src, *dst, ifi_metric);
@@ -1596,6 +1649,11 @@
 		ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
 
 		copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
+		/* Fixup if_data carp(4) vhid. */
+		if (carp_get_vhid_p != NULL)
+			ifm32->ifm_data.ifi_vhid =
+			    (*carp_get_vhid_p)(ifp->if_addr);
+		ifm32->ifm_data.ifi_oqdrops = ifp->if_snd.ifq_drops;
 
 		return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
 	}
@@ -1609,7 +1667,13 @@
 	ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
 
 	ifm->ifm_data = ifp->if_data;
+	/* Fixup if_data carp(4) vhid. */
+	if (carp_get_vhid_p != NULL)
+		ifm->ifm_data.ifi_vhid = (*carp_get_vhid_p)(ifp->if_addr);
 
+	ifm->ifm_data.ifi_datalen += sizeof(u_long);
+	ifm->ifi_oqdrops = ifp->if_snd.ifq_drops;
+
 	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
 }
 
@@ -1629,6 +1693,10 @@
 		ifm32->ifm_index = ifp->if_index;
 
 		copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
+		/* Fixup if_data carp(4) vhid. */
+		if (carp_get_vhid_p != NULL)
+			ifm32->ifm_data.ifi_vhid =
+			    (*carp_get_vhid_p)(ifp->if_addr);
 
 		return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
 	}
@@ -1639,6 +1707,9 @@
 	ifm->ifm_index = ifp->if_index;
 
 	ifm->ifm_data = ifp->if_data;
+	/* Fixup if_data carp(4) vhid. */
+	if (carp_get_vhid_p != NULL)
+		ifm->ifm_data.ifi_vhid = (*carp_get_vhid_p)(ifp->if_addr);
 
 	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
 }
@@ -1664,6 +1735,9 @@
 		ifam32->ifam_metric = ifa->ifa_metric;
 
 		copy_ifdata32(&ifa->ifa_ifp->if_data, &ifam32->ifam_data);
+		/* Fixup if_data carp(4) vhid. */
+		if (carp_get_vhid_p != NULL)
+			ifam32->ifam_data.ifi_vhid = (*carp_get_vhid_p)(ifa);
 
 		return (SYSCTL_OUT(w->w_req, (caddr_t)ifam32, len));
 	}
@@ -1679,6 +1753,9 @@
 	ifam->ifam_metric = ifa->ifa_metric;
 
 	ifam->ifam_data = ifa->if_data;
+	/* Fixup if_data carp(4) vhid. */
+	if (carp_get_vhid_p != NULL)
+		ifam->ifam_data.ifi_vhid = (*carp_get_vhid_p)(ifa);
 
 	return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
 }
@@ -1707,7 +1784,7 @@
 	int len, error = 0;
 
 	bzero((caddr_t)&info, sizeof(info));
-	IFNET_RLOCK();
+	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (w->w_arg && w->w_arg != ifp->if_index)
 			continue;
@@ -1752,7 +1829,7 @@
 done:
 	if (ifp != NULL)
 		IF_ADDR_RUNLOCK(ifp);
-	IFNET_RUNLOCK();
+	IFNET_RUNLOCK_NOSLEEP();
 	return (error);
 }
 
@@ -1766,7 +1843,7 @@
 	struct ifaddr *ifa;
 
 	bzero((caddr_t)&info, sizeof(info));
-	IFNET_RLOCK();
+	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (w->w_arg && w->w_arg != ifp->if_index)
 			continue;
@@ -1801,7 +1878,7 @@
 		IF_ADDR_RUNLOCK(ifp);
 	}
 done:
-	IFNET_RUNLOCK();
+	IFNET_RUNLOCK_NOSLEEP();
 	return (error);
 }
 
@@ -1812,6 +1889,7 @@
 	u_int	namelen = arg2;
 	struct radix_node_head *rnh = NULL; /* silence compiler. */
 	int	i, lim, error = EINVAL;
+	int	fib = 0;
 	u_char	af;
 	struct	walkarg w;
 
@@ -1819,7 +1897,17 @@
 	namelen--;
 	if (req->newptr)
 		return (EPERM);
-	if (namelen != 3)
+	if (name[1] == NET_RT_DUMP) {
+		if (namelen == 3)
+			fib = req->td->td_proc->p_fibnum;
+		else if (namelen == 4)
+			fib = (name[3] == RT_ALL_FIBS) ?
+			    req->td->td_proc->p_fibnum : name[3];
+		else
+			return ((namelen < 3) ? EISDIR : ENOTDIR);
+		if (fib < 0 || fib >= rt_numfibs)
+			return (EINVAL);
+	} else if (namelen != 3)
 		return ((namelen < 3) ? EISDIR : ENOTDIR);
 	af = name[0];
 	if (af > AF_MAX)
@@ -1858,7 +1946,7 @@
 		 * take care of routing entries
 		 */
 		for (error = 0; error == 0 && i <= lim; i++) {
-			rnh = rt_tables_get_rnh(req->td->td_proc->p_fibnum, i);
+			rnh = rt_tables_get_rnh(fib, i);
 			if (rnh != NULL) {
 				RADIX_NODE_HEAD_RLOCK(rnh); 
 			    	error = rnh->rnh_walktree(rnh,

Added: trunk/sys/net/sff8436.h
===================================================================
--- trunk/sys/net/sff8436.h	                        (rev 0)
+++ trunk/sys/net/sff8436.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,214 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 Yandex LLC.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/net/sff8436.h 294202 2016-01-17 05:38:03Z melifaro $
+ */
+
+/*
+ * The following set of constants are from Document SFF-8436
+ * "QSFP+ 10 Gbs 4X PLUGGABLE TRANSCEIVER" revision 4.8 dated October 31, 2013
+ *
+ * This SFF standard defines the following QSFP+ memory address module:
+ *
+ * 1) 256-byte addressable block and 128-byte pages
+ * 2) Lower 128-bytes addresses always refer to the same page
+ * 3) Upper address space may refer to different pages depending on
+ *   "page select" byte value.
+ *
+ * Map description:
+ *
+ * Serial address 0xA02:
+ *
+ * Lower bits
+ * 0-127   Monitoring data & page select byte
+ * 128-255:
+ *
+ * Page 00:
+ * 128-191 Base ID Fields
+ * 191-223 Extended ID
+ * 223-255 Vendor Specific ID
+ *
+ * Page 01 (optional):
+ * 128-255 App-specific data
+ *
+ * Page 02 (optional):
+ * 128-255 User EEPROM Data
+ *
+ * Page 03 (optional for Cable Assmeblies)
+ * 128-223 Thresholds
+ * 225-237 Vendor Specific
+ * 238-253 Channel Controls/Monitor
+ * 254-255 Reserverd
+ *
+ * All these values are read across an I2C (i squared C) bus.
+ */
+
+#define	SFF_8436_BASE	0xA0	/* Base address for all requests */
+
+/* Table 17 - Lower Memory Map */
+enum {
+	SFF_8436_MID		= 0,	/* Copy of SFF_8436_ID field */
+	SFF_8436_STATUS		= 1,	/* 2-bytes status (Table 18) */
+	SFF_8436_INTR_START	= 3,	/* Interrupt flags (Tables 19-21) */
+	SFF_8436_INTR_END	= 21,
+	SFF_8436_MODMON_START	= 22,	/* Module monitors (Table 22 */
+	SFF_8436_TEMP		= 22,	/* Internally measured module temp */
+	SFF_8436_VCC		= 26,	/* Internally mesasure module
+					* supplied voltage */
+	SFF_8436_MODMON_END	= 33,
+	SFF_8436_CHMON_START	= 34,	/* Channel monitors (Table 23) */
+	SFF_8436_RX_CH1_MSB	= 34,	/* Internally measured RX input power */
+	SFF_8436_RX_CH1_LSB	= 35,	/* for channel 1 */
+	SFF_8436_RX_CH2_MSB	= 36,	/* Internally measured RX input power */
+	SFF_8436_RX_CH2_LSB	= 37,	/* for channel 2 */
+	SFF_8436_RX_CH3_MSB	= 38,	/* Internally measured RX input power */
+	SFF_8436_RX_CH3_LSB	= 39,	/* for channel 3 */
+	SFF_8436_RX_CH4_MSB	= 40,	/* Internally measured RX input power */
+	SFF_8436_RX_CH4_LSB	= 41,	/* for channel 4 */
+	SFF_8436_TX_CH1_MSB	= 42,	/* Internally measured TX bias */
+	SFF_8436_TX_CH1_LSB	= 43,	/* for channel 1 */
+	SFF_8436_TX_CH2_MSB	= 44,	/* Internally measured TX bias */
+	SFF_8436_TX_CH2_LSB	= 45,	/* for channel 2 */
+	SFF_8436_TX_CH3_MSB	= 46,	/* Internally measured TX bias */
+	SFF_8436_TX_CH3_LSB	= 47,	/* for channel 3 */
+	SFF_8436_TX_CH4_MSB	= 48,	/* Internally measured TX bias */
+	SFF_8436_TX_CH4_LSB	= 49,	/* for channel 4 */
+	SFF_8436_CHANMON_END	= 81,
+	SFF_8436_CONTROL_START	= 86,	/* Control (Table 24) */
+	SFF_8436_CONTROL_END	= 97,
+	SFF_8436_MASKS_START	= 100,	/* Module/channel masks (Table 25) */
+	SFF_8436_MASKS_END	= 106,
+	SFF_8436_CHPASSWORD	= 119,	/* Password change entry (4 bytes) */
+	SFF_8436_PASSWORD	= 123,	/* Password entry area (4 bytes) */
+	SFF_8436_PAGESEL	= 127,	/* Page select byte */
+};
+
+/* Table 18 - Status Indicators bits */
+/* Byte 1: all bits reserved */
+
+/* Byte 2 bits */
+#define	SFF_8436_STATUS_FLATMEM	(1 << 2)	/* Upper memory flat or paged
+						* 0 = paging, 1=Page 0 only */
+#define	SFF_8436_STATUS_INTL	(1 << 1)	/* Digital state of the intL
+						* Interrupt output pin */
+#define	SFF_8436_STATUS_NOTREADY 1		/* Module has not yet achieved
+						* power up and memory data is not
+						* ready. 0=data is ready */
+/*
+ * Upper page 0 definitions:
+ * Table 29 - Serial ID: Data fields.
+ *
+ * Note that this table is mostly the same as used in SFF-8472.
+ * The only differenee is address shift: +128 bytes.
+ */
+enum {
+	SFF_8436_ID		= 128,  /* Module Type (defined in sff8472.h) */
+	SFF_8436_EXT_ID		= 129,  /* Extended transceiver type
+					 * (Table 31) */
+	SFF_8436_CONNECTOR	= 130,  /* Connector type (Table 32) */
+	SFF_8436_TRANS_START	= 131,  /* Electric or Optical Compatibility
+					 * (Table 33) */
+	SFF_8436_CODE_E1040100G	= 131,	/* 10/40/100G Ethernet Compliance Code */
+	SFF_8436_CODE_SONET	= 132,	/* SONET Compliance codes */
+	SFF_8436_CODE_SATA	= 133,	/* SAS/SATA compliance codes */
+	SFF_8436_CODE_E1G	= 134,	/* Gigabit Ethernet Compliant codes */
+	SFF_8436_CODE_FC_START	= 135,	/* FC link/media/speed */
+	SFF_8436_CODE_FC_END	= 138,
+	SFF_8436_TRANS_END	= 138,
+	SFF_8436_ENCODING	= 139,	/* Encoding Code for high speed
+					* serial encoding algorithm (see
+					* Table 34) */
+	SFF_8436_BITRATE	= 140,	/* Nominal signaling rate, units
+					* of 100MBd. */
+	SFF_8436_RATEID		= 141,	/* Extended RateSelect Compliance
+					* (see Table 35) */
+	SFF_8436_LEN_SMF_KM	= 142,	/* Link length supported for single
+					* mode fiber, units of km */
+	SFF_8436_LEN_OM3	= 143,	/* Link length supported for 850nm
+					* 50um multimode fiber, units of 2 m */
+	SFF_8436_LEN_OM2	= 144, 	/* Link length supported for 50 um
+					* OM2 fiber, units of 1 m */
+	SFF_8436_LEN_OM1	= 145,	/* Link length supported for 1310 nm
+					 * 50um multi-mode fiber, units of 1m*/
+	SFF_8436_LEN_ASM	= 144, /* Link length of passive cable assembly
+					* Length is specified as in the INF
+					* 8074, units of 1m. 0 means this is
+					* not value assembly. Value of 255
+					* means thet the Module supports length
+					* greater than 254 m. */
+	SFF_8436_DEV_TECH	= 147,	/* Device/transmitter technology,
+					* see Table 36/37 */
+	SFF_8436_VENDOR_START	= 148,	/* Vendor name, 16 bytes, padded
+					* right with 0x20 */
+	SFF_8436_VENDOR_END	= 163,
+	SFF_8436_EXTMODCODE	= 164,	/* Extended module code, Table 164 */
+	SFF_8436_VENDOR_OUI_START	= 165 , /* Vendor OUI SFP vendor IEEE
+					* company ID */
+	SFF_8436_VENDOR_OUI_END	= 167,
+	SFF_8436_PN_START 	= 168,	/* Vendor PN, padded right with 0x20 */
+	SFF_8436_PN_END 	= 183,
+	SFF_8436_REV_START 	= 184,	/* Vendor Revision, padded right 0x20 */
+	SFF_8436_REV_END 	= 185,
+	SFF_8436_WAVELEN_START	= 186,	/* Wavelength Laser wavelength
+					* (Passive/Active Cable
+					* Specification Compliance) */
+	SFF_8436_WAVELEN_END	= 189,
+	SFF_8436_MAX_CASE_TEMP	= 190,	/* Allows to specify maximum temp
+					* above 70C. Maximum case temperature is
+					* an 8-bit value in Degrees C. A value
+					*of 0 implies the standard 70C rating.*/
+	SFF_8436_CC_BASE	= 191,	/* CC_BASE Check code for Base ID
+					* Fields (first 63 bytes) */
+	/* Extended ID fields */
+	SFF_8436_OPTIONS_START	= 192, /* Options Indicates which optional
+					* transceiver signals are
+					* implemented (see Table 39) */
+	SFF_8436_OPTIONS_END	= 195,
+	SFF_8436_SN_START 	= 196,	/* Vendor SN, riwght padded with 0x20 */
+	SFF_8436_SN_END 	= 211,
+	SFF_8436_DATE_START	= 212,	/* Vendor’s manufacturing date code
+					* (see Table 40) */
+	SFF_8436_DATE_END	= 219,
+	SFF_8436_DIAG_TYPE	= 220,	/* Diagnostic Monitoring Type
+					* Indicates which type of
+					* diagnostic monitoring is
+					* implemented (if any) in the
+					* transceiver (see Table 41) */
+
+	SFF_8436_ENHANCED	= 221,	/* Enhanced Options Indicates which
+					* optional features are implemented
+					* (if any) in the transceiver
+					* (see Table 42) */
+	SFF_8636_BITRATE	= 222,	/* Nominal bit rate per channel, units
+					* of 250 Mbps */
+	SFF_8436_CC_EXT		= 223,	/* Check code for the Extended ID
+					* Fields (bytes 192-222 incl) */
+	SFF_8436_VENDOR_RSRVD_START	= 224,
+	SFF_8436_VENDOR_RSRVD_END	= 255,
+};
+
+


Property changes on: trunk/sys/net/sff8436.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/net/sff8472.h
===================================================================
--- trunk/sys/net/sff8472.h	                        (rev 0)
+++ trunk/sys/net/sff8472.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,509 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013 George V. Neville-Neil
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/net/sff8472.h 286810 2015-08-15 17:52:55Z melifaro $
+ */
+
+/*
+ * The following set of constants are from Document SFF-8472
+ * "Diagnostic Monitoring Interface for Optical Transceivers" revision
+ * 11.3 published by the SFF Committee on June 11, 2013
+ *
+ * The SFF standard defines two ranges of addresses, each 255 bytes
+ * long for the storage of data and diagnostics on cables, such as
+ * SFP+ optics and TwinAx cables.  The ranges are defined in the
+ * following way:
+ *
+ * Base Address 0xa0 (Identification Data)
+ * 0-95    Serial ID Defined by SFP MSA
+ * 96-127  Vendor Specific Data
+ * 128-255 Reserved
+ *
+ * Base Address 0xa2 (Diagnostic Data)
+ * 0-55    Alarm and Warning Thresholds
+ * 56-95   Cal Constants
+ * 96-119  Real Time Diagnostic Interface
+ * 120-127 Vendor Specific
+ * 128-247 User Writable EEPROM
+ * 248-255 Vendor Specific
+ *
+ * Note that not all addresses are supported.  Where support is
+ * optional this is noted and instructions for checking for the
+ * support are supplied.
+ *
+ * All these values are read across an I2C (i squared C) bus.  Any
+ * device wishing to read these addresses must first have support for
+ * i2c calls.  The Chelsio T4/T5 driver (dev/cxgbe) is one such
+ * driver.
+ */
+
+
+/* Table 3.1 Two-wire interface ID: Data Fields */
+
+enum {
+	SFF_8472_BASE 		= 0xa0,   /* Base address for all our queries. */
+	SFF_8472_ID		= 0,  /* Transceiver Type (Table 3.2) */
+	SFF_8472_EXT_ID		= 1,  /* Extended transceiver type (Table 3.3) */
+	SFF_8472_CONNECTOR	= 2,  /* Connector type (Table 3.4) */
+	SFF_8472_TRANS_START	= 3,  /* Elec or Optical Compatibility
+				    * (Table 3.5) */
+	SFF_8472_TRANS_END	= 10,
+	SFF_8472_ENCODING	= 11, /* Encoding Code for high speed
+				     * serial encoding algorithm (see
+				     * Table 3.6) */
+	SFF_8472_BITRATE	= 12, /* Nominal signaling rate, units
+				     * of 100MBd.  (see details for
+				     * rates > 25.0Gb/s) */
+	SFF_8472_RATEID		= 13, /* Type of rate select
+				     * functionality (see Table
+				     * 3.6a) */
+	SFF_8472_LEN_SMF_KM	= 14, /* Link length supported for single
+				    * mode fiber, units of km */
+	SFF_8472_LEN_SMF	= 15, /* Link length supported for single
+				    * mode fiber, units of 100 m */
+	SFF_8472_LEN_50UM	= 16, /* Link length supported for 50 um
+				    * OM2 fiber, units of 10 m */
+	SFF_8472_LEN_625UM	= 17, /* Link length supported for 62.5
+				    * um OM1 fiber, units of 10 m */
+	SFF_8472_LEN_OM4	= 18, /* Link length supported for 50um
+				    * OM4 fiber, units of 10m.
+				    * Alternatively copper or direct
+				    * attach cable, units of m */
+	SFF_8472_LEN_OM3	= 19, /* Link length supported for 50 um OM3 fiber, units of 10 m */
+	SFF_8472_VENDOR_START 	= 20, /* Vendor name [Address A0h, Bytes
+				    * 20-35] */
+	SFF_8472_VENDOR_END 	= 35,
+	SFF_8472_TRANS		= 36, /* Transceiver Code for electronic
+				    * or optical compatibility (see
+				    * Table 3.5) */
+	SFF_8472_VENDOR_OUI_START	= 37, /* Vendor OUI SFP vendor IEEE
+				    * company ID */
+	SFF_8472_VENDOR_OUI_END	= 39,
+	SFF_8472_PN_START 	= 40, /* Vendor PN */
+	SFF_8472_PN_END 	= 55,
+	SFF_8472_REV_START 	= 56, /* Vendor Revision */
+	SFF_8472_REV_END 	= 59,
+	SFF_8472_WAVELEN_START	= 60, /* Wavelength Laser wavelength
+				    * (Passive/Active Cable
+				    * Specification Compliance) */
+	SFF_8472_WAVELEN_END	= 61,
+	SFF_8472_CC_BASE	= 63, /* CC_BASE Check code for Base ID
+				    * Fields (addresses 0 to 62) */
+
+/*
+ * Extension Fields (optional) check the options before reading other
+ * addresses.
+ */
+	SFF_8472_OPTIONS_MSB	= 64, /* Options Indicates which optional
+				    * transceiver signals are
+				    * implemented */
+	SFF_8472_OPTIONS_LSB	= 65, /* (see Table 3.7) */
+	SFF_8472_BR_MAX		= 66, /* BR max Upper bit rate margin,
+				    * units of % (see details for
+				    * rates > 25.0Gb/s) */
+	SFF_8472_BR_MIN		= 67, /* Lower bit rate margin, units of
+				    * % (see details for rates >
+				    * 25.0Gb/s) */
+	SFF_8472_SN_START 	= 68, /* Vendor SN [Address A0h, Bytes 68-83] */
+	SFF_8472_SN_END 	= 83,
+	SFF_8472_DATE_START	= 84, /* Date code Vendor’s manufacturing
+				    * date code (see Table 3.8) */
+	SFF_8472_DATE_END	= 91,
+	SFF_8472_DIAG_TYPE	= 92, /* Diagnostic Monitoring Type
+				    * Indicates which type of
+				    * diagnostic monitoring is
+				    * implemented (if any) in the
+				    * transceiver (see Table 3.9)
+				    */
+
+	SFF_8472_ENHANCED	= 93, /* Enhanced Options Indicates which
+				    * optional enhanced features are
+				    * implemented (if any) in the
+				    * transceiver (see Table 3.10) */
+	SFF_8472_COMPLIANCE	= 94, /* SFF-8472 Compliance Indicates
+				    * which revision of SFF-8472 the
+				    * transceiver complies with.  (see
+				    * Table 3.12)*/
+	SFF_8472_CC_EXT		= 95, /* Check code for the Extended ID
+				    * Fields (addresses 64 to 94)
+				    */
+
+	SFF_8472_VENDOR_RSRVD_START	= 96,
+	SFF_8472_VENDOR_RSRVD_END	= 127,
+
+	SFF_8472_RESERVED_START	= 128,
+	SFF_8472_RESERVED_END	= 255
+};
+
+#define SFF_8472_DIAG_IMPL	(1 << 6) /* Required to be 1 */
+#define SFF_8472_DIAG_INTERNAL	(1 << 5) /* Internal measurements. */
+#define SFF_8472_DIAG_EXTERNAL	(1 << 4) /* External measurements. */
+#define SFF_8472_DIAG_POWER	(1 << 3) /* Power measurement type */
+#define SFF_8472_DIAG_ADDR_CHG	(1 << 2) /* Address change required.
+					  * See SFF-8472 doc. */
+
+ /*
+  * Diagnostics are available at the two wire address 0xa2.  All
+  * diagnostics are OPTIONAL so you should check 0xa0 registers 92 to
+  * see which, if any are supported.
+  */
+
+enum {SFF_8472_DIAG = 0xa2};  /* Base address for diagnostics. */
+
+ /*
+  *  Table 3.15 Alarm and Warning Thresholds All values are 2 bytes
+  * and MUST be read in a single read operation starting at the MSB
+  */
+
+enum {
+	SFF_8472_TEMP_HIGH_ALM		= 0, /* Temp High Alarm  */
+	SFF_8472_TEMP_LOW_ALM		= 2, /* Temp Low Alarm */
+	SFF_8472_TEMP_HIGH_WARN		= 4, /* Temp High Warning */
+	SFF_8472_TEMP_LOW_WARN		= 6, /* Temp Low Warning */
+	SFF_8472_VOLTAGE_HIGH_ALM	= 8, /* Voltage High Alarm */
+	SFF_8472_VOLTAGE_LOW_ALM	= 10, /* Voltage Low Alarm */
+	SFF_8472_VOLTAGE_HIGH_WARN	= 12, /* Voltage High Warning */
+	SFF_8472_VOLTAGE_LOW_WARN	= 14, /* Voltage Low Warning */
+	SFF_8472_BIAS_HIGH_ALM		= 16, /* Bias High Alarm */
+	SFF_8472_BIAS_LOW_ALM		= 18, /* Bias Low Alarm */
+	SFF_8472_BIAS_HIGH_WARN		= 20, /* Bias High Warning */
+	SFF_8472_BIAS_LOW_WARN		= 22, /* Bias Low Warning */
+	SFF_8472_TX_POWER_HIGH_ALM	= 24, /* TX Power High Alarm */
+	SFF_8472_TX_POWER_LOW_ALM	= 26, /* TX Power Low Alarm */
+	SFF_8472_TX_POWER_HIGH_WARN	= 28, /* TX Power High Warning */
+	SFF_8472_TX_POWER_LOW_WARN	= 30, /* TX Power Low Warning */
+	SFF_8472_RX_POWER_HIGH_ALM	= 32, /* RX Power High Alarm */
+	SFF_8472_RX_POWER_LOW_ALM	= 34, /* RX Power Low Alarm */
+	SFF_8472_RX_POWER_HIGH_WARN	= 36, /* RX Power High Warning */
+	SFF_8472_RX_POWER_LOW_WARN	= 38, /* RX Power Low Warning */
+
+	SFF_8472_RX_POWER4	= 56, /* Rx_PWR(4) Single precision
+				    *  floating point calibration data
+				    *  - Rx optical power. Bit 7 of
+				    *  byte 56 is MSB. Bit 0 of byte
+				    *  59 is LSB. Rx_PWR(4) should be
+				    *  set to zero for “internally
+				    *  calibrated” devices. */
+	SFF_8472_RX_POWER3	= 60, /* Rx_PWR(3) Single precision
+				    * floating point calibration data
+				    * - Rx optical power.  Bit 7 of
+				    * byte 60 is MSB. Bit 0 of byte 63
+				    * is LSB. Rx_PWR(3) should be set
+				    * to zero for “internally
+				    * calibrated” devices.*/
+	SFF_8472_RX_POWER2	= 64, /* Rx_PWR(2) Single precision
+				    * floating point calibration data,
+				    * Rx optical power.  Bit 7 of byte
+				    * 64 is MSB, bit 0 of byte 67 is
+				    * LSB. Rx_PWR(2) should be set to
+				    * zero for “internally calibrated”
+				    * devices. */
+	SFF_8472_RX_POWER1	= 68, /* Rx_PWR(1) Single precision
+				    * floating point calibration data,
+				    * Rx optical power. Bit 7 of byte
+				    * 68 is MSB, bit 0 of byte 71 is
+				    * LSB. Rx_PWR(1) should be set to
+				    * 1 for “internally calibrated”
+				    * devices. */
+	SFF_8472_RX_POWER0	= 72, /* Rx_PWR(0) Single precision
+				    * floating point calibration data,
+				    * Rx optical power. Bit 7 of byte
+				    * 72 is MSB, bit 0 of byte 75 is
+				    * LSB. Rx_PWR(0) should be set to
+				    * zero for “internally calibrated”
+				    * devices. */
+	SFF_8472_TX_I_SLOPE	= 76, /* Tx_I(Slope) Fixed decimal
+				    * (unsigned) calibration data,
+				    * laser bias current. Bit 7 of
+				    * byte 76 is MSB, bit 0 of byte 77
+				    * is LSB. Tx_I(Slope) should be
+				    * set to 1 for “internally
+				    * calibrated” devices. */
+	SFF_8472_TX_I_OFFSET	= 78, /* Tx_I(Offset) Fixed decimal
+				    * (signed two’s complement)
+				    * calibration data, laser bias
+				    * current. Bit 7 of byte 78 is
+				    * MSB, bit 0 of byte 79 is
+				    * LSB. Tx_I(Offset) should be set
+				    * to zero for “internally
+				    * calibrated” devices. */
+	SFF_8472_TX_POWER_SLOPE	= 80, /* Tx_PWR(Slope) Fixed decimal
+				    * (unsigned) calibration data,
+				    * transmitter coupled output
+				    * power. Bit 7 of byte 80 is MSB,
+				    * bit 0 of byte 81 is LSB.
+				    * Tx_PWR(Slope) should be set to 1
+				    * for “internally calibrated”
+				    * devices. */
+	SFF_8472_TX_POWER_OFFSET	= 82, /* Tx_PWR(Offset) Fixed decimal
+					    * (signed two’s complement)
+					    * calibration data, transmitter
+					    * coupled output power. Bit 7 of
+					    * byte 82 is MSB, bit 0 of byte 83
+					    * is LSB. Tx_PWR(Offset) should be
+					    * set to zero for “internally
+					    * calibrated” devices. */
+	SFF_8472_T_SLOPE	= 84, /* T (Slope) Fixed decimal
+				    * (unsigned) calibration data,
+				    * internal module temperature. Bit
+				    * 7 of byte 84 is MSB, bit 0 of
+				    * byte 85 is LSB.  T(Slope) should
+				    * be set to 1 for “internally
+				    * calibrated” devices. */
+	SFF_8472_T_OFFSET	= 86, /* T (Offset) Fixed decimal (signed
+				    * two’s complement) calibration
+				    * data, internal module
+				    * temperature. Bit 7 of byte 86 is
+				    * MSB, bit 0 of byte 87 is LSB.
+				    * T(Offset) should be set to zero
+				    * for “internally calibrated”
+				    * devices. */
+	SFF_8472_V_SLOPE	= 88, /* V (Slope) Fixed decimal
+				    * (unsigned) calibration data,
+				    * internal module supply
+				    * voltage. Bit 7 of byte 88 is
+				    * MSB, bit 0 of byte 89 is
+				    * LSB. V(Slope) should be set to 1
+				    * for “internally calibrated”
+				    * devices. */
+	SFF_8472_V_OFFSET	= 90, /* V (Offset) Fixed decimal (signed
+				    * two’s complement) calibration
+				    * data, internal module supply
+				    * voltage. Bit 7 of byte 90 is
+				    * MSB. Bit 0 of byte 91 is
+				    * LSB. V(Offset) should be set to
+				    * zero for “internally calibrated”
+				    * devices. */
+	SFF_8472_CHECKSUM	= 95, /* Checksum Byte 95 contains the
+				    * low order 8 bits of the sum of
+				    * bytes 0 – 94. */
+	/* Internal measurements. */
+
+	SFF_8472_TEMP	 	= 96, /* Internally measured module temperature. */
+	SFF_8472_VCC 		= 98, /* Internally measured supply
+				    * voltage in transceiver.
+				    */
+	SFF_8472_TX_BIAS	= 100, /* Internally measured TX Bias Current. */
+	SFF_8472_TX_POWER	= 102, /* Measured TX output power. */
+	SFF_8472_RX_POWER	= 104, /* Measured RX input power. */
+
+	SFF_8472_STATUS		= 110 /* See below */
+};
+ /* Status Bits Described */
+
+/*
+ * TX Disable State Digital state of the TX Disable Input Pin. Updated
+ * within 100ms of change on pin.
+ */
+#define SFF_8472_STATUS_TX_DISABLE  (1 << 7) 
+
+/*
+ * Select Read/write bit that allows software disable of
+ * laser. Writing ‘1’ disables laser. See Table 3.11 for
+ * enable/disable timing requirements. This bit is “OR”d with the hard
+ * TX_DISABLE pin value. Note, per SFP MSA TX_DISABLE pin is default
+ * enabled unless pulled low by hardware. If Soft TX Disable is not
+ * implemented, the transceiver ignores the value of this bit. Default
+ * power up value is zero/low.
+ */
+#define SFF_8472_STATUS_SOFT_TX_DISABLE (1 << 6) 
+
+/*
+ * RS(1) State Digital state of SFP input pin AS(1) per SFF-8079 or
+ * RS(1) per SFF-8431. Updated within 100ms of change on pin. See A2h
+ * Byte 118, Bit 3 for Soft RS(1) Select control information.
+ */
+#define SFF_8472_RS_STATE (1 << 5) 
+
+/*
+ * Rate_Select State [aka. “RS(0)”] Digital state of the SFP
+ * Rate_Select Input Pin. Updated within 100ms of change on pin. Note:
+ * This pin is also known as AS(0) in SFF-8079 and RS(0) in SFF-8431.
+ */ 
+#define SFF_8472_STATUS_SELECT_STATE (1 << 4)
+     
+/*
+ * Read/write bit that allows software rate select control. Writing
+ * ‘1’ selects full bandwidth operation. This bit is “OR’d with the
+ * hard Rate_Select, AS(0) or RS(0) pin value. See Table 3.11 for
+ * timing requirements. Default at power up is logic zero/low. If Soft
+ * Rate Select is not implemented, the transceiver ignores the value
+ * of this bit. Note: Specific transceiver behaviors of this bit are
+ * identified in Table 3.6a and referenced documents. See Table 3.18a,
+ * byte 118, bit 3 for Soft RS(1) Select.
+ */
+#define SFF_8472_STATUS_SOFT_RATE_SELECT (1 << 3)
+
+/*
+ * TX Fault State Digital state of the TX Fault Output Pin. Updated
+ * within 100ms of change on pin.
+ */
+#define SFF_8472_STATUS_TX_FAULT_STATE (1 << 2)
+
+/*
+ * Digital state of the RX_LOS Output Pin. Updated within 100ms of
+ * change on pin.
+ */
+#define SFF_8472_STATUS_RX_LOS (1 << 1)
+
+/*
+ * Indicates transceiver has achieved power up and data is ready. Bit
+ * remains high until data is ready to be read at which time the
+ * device sets the bit low.
+ */
+#define SFF_8472_STATUS_DATA_READY (1 << 0)
+
+/*
+ * Table 3.2 Identifier values.
+ * Identifier constants has taken from SFF-8024 rev 2.9 table 4.1
+ * (as referenced by table 3.2 footer)
+ * */
+enum {
+	SFF_8024_ID_UNKNOWN	= 0x0, /* Unknown or unspecified */
+	SFF_8024_ID_GBIC	= 0x1, /* GBIC */
+	SFF_8024_ID_SFF		= 0x2, /* Module soldered to motherboard (ex: SFF)*/
+	SFF_8024_ID_SFP		= 0x3, /* SFP or SFP “Plus” */
+	SFF_8024_ID_XBI		= 0x4, /* 300 pin XBI */
+	SFF_8024_ID_XENPAK	= 0x5, /* Xenpak */
+	SFF_8024_ID_XFP		= 0x6, /* XFP */
+	SFF_8024_ID_XFF		= 0x7, /* XFF */
+	SFF_8024_ID_XFPE	= 0x8, /* XFP-E */
+	SFF_8024_ID_XPAK	= 0x9, /* XPAk */
+	SFF_8024_ID_X2		= 0xA, /* X2 */
+	SFF_8024_ID_DWDM_SFP	= 0xB, /* DWDM-SFP */
+	SFF_8024_ID_QSFP	= 0xC, /* QSFP */
+	SFF_8024_ID_QSFPPLUS	= 0xD, /* QSFP+ */
+	SFF_8024_ID_CXP		= 0xE, /* CXP */
+	SFF_8024_ID_HD4X	= 0xF, /* Shielded Mini Multilane HD 4X */ 
+	SFF_8024_ID_HD8X	= 0x10, /* Shielded Mini Multilane HD 8X */ 
+	SFF_8024_ID_QSFP28	= 0x11, /* QSFP28 */
+	SFF_8024_ID_CXP2	= 0x12, /* CXP2 (aka CXP28) */
+	SFF_8024_ID_CDFP	= 0x13, /* CDFP (Style 1/Style 2) */
+	SFF_8024_ID_SMM4	= 0x14, /* Shielded Mini Multilate HD 4X Fanout */
+	SFF_8024_ID_SMM8	= 0x15, /* Shielded Mini Multilate HD 8X Fanout */
+	SFF_8024_ID_CDFP3	= 0x16, /* CDFP (Style3) */
+	SFF_8024_ID_LAST	= SFF_8024_ID_CDFP3
+	};
+
+static const char *sff_8024_id[SFF_8024_ID_LAST + 1] = {"Unknown",
+					     "GBIC",
+					     "SFF",
+					     "SFP/SFP+/SFP28",
+					     "XBI",
+					     "Xenpak",
+					     "XFP",
+					     "XFF",
+					     "XFP-E",
+					     "XPAK",
+					     "X2",
+					     "DWDM-SFP/SFP+",
+					     "QSFP",
+					     "QSFP+",
+					     "CXP",
+					     "HD4X",
+					     "HD8X",
+					     "QSFP28",
+					     "CXP2",
+					     "CDFP",
+					     "SMM4",
+					     "SMM8",
+					     "CDFP3"};
+
+/* Keep compability with old definitions */
+#define	SFF_8472_ID_UNKNOWN	SFF_8024_ID_UNKNOWN
+#define	SFF_8472_ID_GBIC	SFF_8024_ID_GBIC
+#define	SFF_8472_ID_SFF		SFF_8024_ID_SFF
+#define	SFF_8472_ID_SFP		SFF_8024_ID_SFP
+#define	SFF_8472_ID_XBI		SFF_8024_ID_XBI
+#define	SFF_8472_ID_XENPAK	SFF_8024_ID_XENPAK
+#define	SFF_8472_ID_XFP		SFF_8024_ID_XFP
+#define	SFF_8472_ID_XFF		SFF_8024_ID_XFF
+#define	SFF_8472_ID_XFPE	SFF_8024_ID_XFPE
+#define	SFF_8472_ID_XPAK	SFF_8024_ID_XPAK
+#define	SFF_8472_ID_X2		SFF_8024_ID_X2
+#define	SFF_8472_ID_DWDM_SFP	SFF_8024_ID_DWDM_SFP
+#define	SFF_8472_ID_QSFP	SFF_8024_ID_QSFP
+#define	SFF_8472_ID_LAST	SFF_8024_ID_LAST
+
+#define	sff_8472_id		sff_8024_id
+
+/*
+ * Table 3.9 Diagnostic Monitoring Type (byte 92)
+ * bits described.
+ */
+
+/*
+ * Digital diagnostic monitoring implemented.
+ * Set to 1 for transceivers implementing DDM.
+ */
+#define	SFF_8472_DDM_DONE	(1 << 6)
+
+/*
+ * Measurements are internally calibrated.
+ */
+#define	SFF_8472_DDM_INTERNAL	(1 << 5)
+
+/*
+ * Measurements are externally calibrated.
+ */
+#define	SFF_8472_DDM_EXTERNAL	(1 << 4)
+
+/*
+ * Received power measurement type
+ * 0 = OMA, 1 = average power
+ */
+#define	SFF_8472_DDM_PMTYPE	(1 << 3)
+
+/* Table 3.13 and 3.14 Temperature Conversion Values */
+#define SFF_8472_TEMP_SIGN (1 << 15)
+#define SFF_8472_TEMP_SHIFT  8
+#define SFF_8472_TEMP_MSK  0xEF00
+#define SFF_8472_TEMP_FRAC 0x00FF
+
+/* Internal Callibration Conversion factors */
+
+/*
+ * Represented as a 16 bit unsigned integer with the voltage defined
+ * as the full 16 bit value (0 – 65535) with LSB equal to 100 uVolt,
+ * yielding a total range of 0 to +6.55 Volts.
+ */
+#define SFF_8472_VCC_FACTOR 10000.0 
+
+/*
+ * Represented as a 16 bit unsigned integer with the current defined
+ * as the full 16 bit value (0 – 65535) with LSB equal to 2 uA,
+ * yielding a total range of 0 to 131 mA.
+ */
+
+#define SFF_8472_BIAS_FACTOR 2000.0 
+
+/*
+ * Represented as a 16 bit unsigned integer with the power defined as
+ * the full 16 bit value (0 – 65535) with LSB equal to 0.1 uW,
+ * yielding a total range of 0 to 6.5535 mW (~ -40 to +8.2 dBm).
+ */
+
+#define SFF_8472_POWER_FACTOR 10000.0


Property changes on: trunk/sys/net/sff8472.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/slcompress.c
===================================================================
--- trunk/sys/net/slcompress.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/slcompress.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)slcompress.c	8.2 (Berkeley) 4/16/94
- * $FreeBSD: stable/9/sys/net/slcompress.c 128019 2004-04-07 20:46:16Z imp $
+ * $FreeBSD: stable/10/sys/net/slcompress.c 128019 2004-04-07 20:46:16Z imp $
  */
 
 /*

Modified: trunk/sys/net/slcompress.h
===================================================================
--- trunk/sys/net/slcompress.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/slcompress.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -32,7 +32,7 @@
  *
  *	Van Jacobson (van at helios.ee.lbl.gov), Dec 31, 1989:
  *	- Initial distribution.
- * $FreeBSD: stable/9/sys/net/slcompress.h 139823 2005-01-07 01:45:51Z imp $
+ * $FreeBSD: stable/10/sys/net/slcompress.h 139823 2005-01-07 01:45:51Z imp $
  */
 
 #ifndef _NET_SLCOMPRESS_H_

Modified: trunk/sys/net/vnet.c
===================================================================
--- trunk/sys/net/vnet.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/vnet.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/vnet.c 249132 2013-04-05 08:22:11Z mav $");
+__FBSDID("$FreeBSD: stable/10/sys/net/vnet.c 262734 2014-03-04 14:01:12Z glebius $");
 
 #include "opt_ddb.h"
 #include "opt_kdb.h"
@@ -211,14 +211,14 @@
 static struct sx vnet_data_free_lock;
 
 SDT_PROVIDER_DEFINE(vnet);
-SDT_PROBE_DEFINE1(vnet, functions, vnet_alloc, entry, entry, "int");
-SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, alloc, alloc, "int",
+SDT_PROBE_DEFINE1(vnet, functions, vnet_alloc, entry, "int");
+SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, alloc, "int",
     "struct vnet *");
-SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, return, return,
+SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, return,
     "int", "struct vnet *");
-SDT_PROBE_DEFINE2(vnet, functions, vnet_destroy, entry, entry,
+SDT_PROBE_DEFINE2(vnet, functions, vnet_destroy, entry,
     "int", "struct vnet *");
-SDT_PROBE_DEFINE1(vnet, functions, vnet_destroy, return, entry,
+SDT_PROBE_DEFINE1(vnet, functions, vnet_destroy, return,
     "int");
 
 #ifdef DDB
@@ -466,47 +466,6 @@
 }
 
 /*
- * Variants on sysctl_handle_foo that know how to handle virtualized global
- * variables: if 'arg1' is a pointer, then we transform it to the local vnet
- * offset.
- */
-int
-vnet_sysctl_handle_int(SYSCTL_HANDLER_ARGS)
-{
-
-	if (arg1 != NULL)
-		arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
-	return (sysctl_handle_int(oidp, arg1, arg2, req));
-}
-
-int
-vnet_sysctl_handle_opaque(SYSCTL_HANDLER_ARGS)
-{
-
-	if (arg1 != NULL)
-		arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
-	return (sysctl_handle_opaque(oidp, arg1, arg2, req));
-}
-
-int
-vnet_sysctl_handle_string(SYSCTL_HANDLER_ARGS)
-{
-
-	if (arg1 != NULL)
-		arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
-	return (sysctl_handle_string(oidp, arg1, arg2, req));
-}
-
-int
-vnet_sysctl_handle_uint(SYSCTL_HANDLER_ARGS)
-{
-
-	if (arg1 != NULL)
-		arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
-	return (sysctl_handle_int(oidp, arg1, arg2, req));
-}
-
-/*
  * Support for special SYSINIT handlers registered via VNET_SYSINIT()
  * and VNET_SYSUNINIT().
  */

Modified: trunk/sys/net/vnet.h
===================================================================
--- trunk/sys/net/vnet.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/vnet.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -33,7 +33,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/net/vnet.h 218567 2011-02-11 14:17:58Z bz $
+ * $FreeBSD: stable/10/sys/net/vnet.h 262735 2014-03-04 14:05:37Z glebius $
  */
 
 /*-
@@ -86,6 +86,56 @@
 
 #ifdef _KERNEL
 
+#define	VNET_PCPUSTAT_DECLARE(type, name)	\
+    VNET_DECLARE(counter_u64_t, name[sizeof(type) / sizeof(uint64_t)])
+
+#define	VNET_PCPUSTAT_DEFINE(type, name)	\
+    VNET_DEFINE(counter_u64_t, name[sizeof(type) / sizeof(uint64_t)])
+
+#define	VNET_PCPUSTAT_ALLOC(name, wait)	\
+    COUNTER_ARRAY_ALLOC(VNET(name), \
+	sizeof(VNET(name)) / sizeof(counter_u64_t), (wait))
+
+#define	VNET_PCPUSTAT_FREE(name)	\
+    COUNTER_ARRAY_FREE(VNET(name), sizeof(VNET(name)) / sizeof(counter_u64_t))
+
+#define	VNET_PCPUSTAT_ADD(type, name, f, v)	\
+    counter_u64_add(VNET(name)[offsetof(type, f) / sizeof(uint64_t)], (v))
+
+#define	VNET_PCPUSTAT_SYSINIT(name)	\
+static void				\
+vnet_##name##_init(const void *unused)	\
+{					\
+	VNET_PCPUSTAT_ALLOC(name, M_WAITOK);	\
+}					\
+VNET_SYSINIT(vnet_ ## name ## _init, SI_SUB_PROTO_IFATTACHDOMAIN,	\
+    SI_ORDER_ANY, vnet_ ## name ## _init, NULL)
+
+#define	VNET_PCPUSTAT_SYSUNINIT(name)					\
+static void								\
+vnet_##name##_uninit(const void *unused)				\
+{									\
+	VNET_PCPUSTAT_FREE(name);					\
+}									\
+VNET_SYSUNINIT(vnet_ ## name ## _uninit, SI_SUB_PROTO_IFATTACHDOMAIN,	\
+    SI_ORDER_ANY, vnet_ ## name ## _uninit, NULL)
+
+#define	SYSCTL_VNET_PCPUSTAT(parent, nbr, name, type, array, desc)	\
+static int								\
+array##_sysctl(SYSCTL_HANDLER_ARGS)					\
+{									\
+	type s;								\
+	CTASSERT((sizeof(type) / sizeof(uint64_t)) ==			\
+	    (sizeof(VNET(array)) / sizeof(counter_u64_t)));		\
+	COUNTER_ARRAY_COPY(VNET(array), &s, sizeof(type) / sizeof(uint64_t));\
+	if (req->newptr)						\
+		COUNTER_ARRAY_ZERO(VNET(array),				\
+		    sizeof(type) / sizeof(uint64_t));			\
+	return (SYSCTL_OUT(req, &s, sizeof(type)));			\
+}									\
+SYSCTL_VNET_PROC(parent, nbr, name, CTLTYPE_OPAQUE | CTLFLAG_RW, NULL,	\
+    0, array ## _sysctl, "I", desc)
+
 #ifdef VIMAGE
 #include <sys/lock.h>
 #include <sys/proc.h>			/* for struct thread */
@@ -241,15 +291,10 @@
  * arguments themselves, if required.
  */
 #ifdef SYSCTL_OID
-int	vnet_sysctl_handle_int(SYSCTL_HANDLER_ARGS);
-int	vnet_sysctl_handle_opaque(SYSCTL_HANDLER_ARGS);
-int	vnet_sysctl_handle_string(SYSCTL_HANDLER_ARGS);
-int	vnet_sysctl_handle_uint(SYSCTL_HANDLER_ARGS);
-
 #define	SYSCTL_VNET_INT(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,					\
 	    CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_VNET|(access),		\
-	    ptr, val, vnet_sysctl_handle_int, "I", descr)
+	    ptr, val, sysctl_handle_int, "I", descr)
 #define	SYSCTL_VNET_PROC(parent, nbr, name, access, ptr, arg, handler,	\
 	    fmt, descr)							\
 	CTASSERT(((access) & CTLTYPE) != 0);				\
@@ -259,20 +304,20 @@
 	    descr)							\
 	SYSCTL_OID(parent, nbr, name,					\
 	    CTLTYPE_OPAQUE|CTLFLAG_VNET|(access), ptr, len, 		\
-	    vnet_sysctl_handle_opaque, fmt, descr)
+	    sysctl_handle_opaque, fmt, descr)
 #define	SYSCTL_VNET_STRING(parent, nbr, name, access, arg, len, descr)	\
 	SYSCTL_OID(parent, nbr, name,					\
 	    CTLTYPE_STRING|CTLFLAG_VNET|(access),			\
-	    arg, len, vnet_sysctl_handle_string, "A", descr)
+	    arg, len, sysctl_handle_string, "A", descr)
 #define	SYSCTL_VNET_STRUCT(parent, nbr, name, access, ptr, type, descr)	\
 	SYSCTL_OID(parent, nbr, name,					\
 	    CTLTYPE_OPAQUE|CTLFLAG_VNET|(access), ptr,			\
-	    sizeof(struct type), vnet_sysctl_handle_opaque, "S," #type,	\
+	    sizeof(struct type), sysctl_handle_opaque, "S," #type,	\
 	    descr)
 #define	SYSCTL_VNET_UINT(parent, nbr, name, access, ptr, val, descr)	\
 	SYSCTL_OID(parent, nbr, name,					\
 	    CTLTYPE_UINT|CTLFLAG_MPSAFE|CTLFLAG_VNET|(access),		\
-	    ptr, val, vnet_sysctl_handle_uint, "IU", descr)
+	    ptr, val, sysctl_handle_int, "IU", descr)
 #define	VNET_SYSCTL_ARG(req, arg1) do {					\
 	if (arg1 != NULL)						\
 		arg1 = (void *)(TD_TO_VNET((req)->td)->vnet_data_base +	\

Modified: trunk/sys/net/zlib.c
===================================================================
--- trunk/sys/net/zlib.c	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/zlib.c	2018-05-25 20:05:59 UTC (rev 9938)
@@ -11,7 +11,7 @@
  * - added inflateIncomp and deflateOutputPending
  * - allow strm->next_out to be NULL, meaning discard the output
  *
- * $FreeBSD: stable/9/sys/net/zlib.c 149993 2005-09-11 16:13:02Z rodrigc $
+ * $FreeBSD: stable/10/sys/net/zlib.c 245102 2013-01-06 14:59:59Z peter $
  */
 
 /* 
@@ -26,7 +26,14 @@
 #define MY_ZCALLOC
 
 #if defined(__FreeBSD__) && defined(_KERNEL)
-#define inflate	inflate_ppp	/* FreeBSD already has an inflate :-( */
+#define	_tr_init		_zlib104_tr_init
+#define	_tr_align		_zlib104_tr_align
+#define	_tr_tally		_zlib104_tr_tally
+#define	_tr_flush_block		_zlib104_tr_flush_block
+#define	_tr_stored_block	_zlib104_tr_stored_block
+#define	inflate_fast		_zlib104_inflate_fast
+#define	inflate			_zlib104_inflate
+#define	zlibVersion		_zlib104_Version
 #endif
 
 

Modified: trunk/sys/net/zlib.h
===================================================================
--- trunk/sys/net/zlib.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/zlib.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,5 @@
 /* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/net/zlib.h 204552 2010-03-02 06:58:58Z alfred $	*/
+/* $FreeBSD: stable/10/sys/net/zlib.h 245102 2013-01-06 14:59:59Z peter $	*/
 
 /*
  * This file is derived from zlib.h and zconf.h from the zlib-1.0.4
@@ -110,7 +110,7 @@
 #if (defined(_WIN32) || defined(__WIN32__)) && !defined(WIN32)
 #  define WIN32
 #endif
-#if defined(__GNUC__) || defined(WIN32) || defined(__386__) || defined(i386)
+#if defined(__GNUC__) || defined(WIN32) || defined(__386__) || defined(__i386__)
 #  ifndef __32BIT__
 #    define __32BIT__
 #  endif
@@ -512,7 +512,7 @@
 */
 
 #if defined(__FreeBSD__) && defined(_KERNEL)
-#define inflate       inflate_ppp     /* FreeBSD already has an inflate :-( */
+#define inflate       _zlib104_inflate     /* FreeBSD already has an inflate :-( */
 #endif
 
 extern int EXPORT inflate OF((z_streamp strm, int flush));

Modified: trunk/sys/net/zutil.h
===================================================================
--- trunk/sys/net/zutil.h	2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/zutil.h	2018-05-25 20:05:59 UTC (rev 9938)
@@ -10,7 +10,7 @@
  */
 
 /* From: zutil.h,v 1.16 1996/07/24 13:41:13 me Exp $ */
-/* $FreeBSD: stable/9/sys/net/zutil.h 204552 2010-03-02 06:58:58Z alfred $ */
+/* $FreeBSD: stable/10/sys/net/zutil.h 204552 2010-03-02 06:58:58Z alfred $ */
 
 #ifndef _Z_UTIL_H
 #define _Z_UTIL_H