[Midnightbsd-cvs] src: sys/net:

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sun Sep 21 11:36:58 EDT 2008


Log Message:
-----------


Modified Files:
--------------
    src/sys/net:
        bpf.c (r1.1.1.1 -> r1.2)
        bpf.h (r1.1.1.1 -> r1.2)
        bpf_filter.c (r1.1.1.1 -> r1.2)
        bpfdesc.h (r1.1.1.1 -> r1.2)
        bridgestp.c (r1.2 -> r1.3)
        bsd_comp.c (r1.1.1.1 -> r1.2)
        ethernet.h (r1.1.1.1 -> r1.2)
        fddi.h (r1.1.1.1 -> r1.2)
        if.c (r1.1.1.2 -> r1.2)
        if.h (r1.1.1.2 -> r1.2)
        if_arc.h (r1.1.1.1 -> r1.2)
        if_arcsubr.c (r1.1.1.1 -> r1.2)
        if_arp.h (r1.1.1.1 -> r1.2)
        if_atm.h (r1.1.1.1 -> r1.2)
        if_atmsubr.c (r1.1.1.1 -> r1.2)
        if_bridge.c (r1.2 -> r1.3)
        if_bridgevar.h (r1.2 -> r1.3)
        if_clone.c (r1.1.1.1 -> r1.2)
        if_clone.h (r1.1.1.1 -> r1.2)
        if_disc.c (r1.1.1.1 -> r1.2)
        if_ef.c (r1.1.1.2 -> r1.2)
        if_ethersubr.c (r1.3 -> r1.4)
        if_faith.c (r1.1.1.1 -> r1.2)
        if_fddisubr.c (r1.1.1.1 -> r1.2)
        if_fwsubr.c (r1.1.1.2 -> r1.2)
        if_gif.c (r1.1.1.2 -> r1.2)
        if_gif.h (r1.1.1.2 -> r1.2)
        if_gre.c (r1.1.1.2 -> r1.2)
        if_iso88025subr.c (r1.1.1.1 -> r1.2)
        if_llc.h (r1.1.1.1 -> r1.2)
        if_loop.c (r1.2 -> r1.3)
        if_media.c (r1.2 -> r1.3)
        if_media.h (r1.3 -> r1.4)
        if_mib.c (r1.1.1.2 -> r1.2)
        if_mib.h (r1.1.1.2 -> r1.2)
        if_ppp.c (r1.2 -> r1.3)
        if_pppvar.h (r1.1.1.1 -> r1.2)
        if_sl.c (r1.1.1.1 -> r1.2)
        if_spppsubr.c (r1.2 -> r1.3)
        if_stf.c (r1.1.1.1 -> r1.2)
        if_tap.c (r1.1.1.2 -> r1.2)
        if_tap.h (r1.1.1.2 -> r1.2)
        if_tun.c (r1.1.1.1 -> r1.2)
        if_types.h (r1.1.1.1 -> r1.2)
        if_var.h (r1.1.1.1 -> r1.2)
        if_vlan.c (r1.1.1.2 -> r1.2)
        if_vlan_var.h (r1.1.1.2 -> r1.2)
        iso88025.h (r1.1.1.1 -> r1.2)
        netisr.c (r1.1.1.1 -> r1.2)
        pfil.c (r1.1.1.1 -> r1.2)
        pfil.h (r1.1.1.1 -> r1.2)
        pfkeyv2.h (r1.1.1.1 -> r1.2)
        ppp_deflate.c (r1.2 -> r1.3)
        ppp_tty.c (r1.1.1.1 -> r1.2)
        radix.c (r1.1.1.2 -> r1.2)
        raw_cb.c (r1.1.1.1 -> r1.2)
        raw_usrreq.c (r1.1.1.1 -> r1.2)
        route.c (r1.1.1.1 -> r1.2)
        route.h (r1.2 -> r1.3)
        rtsock.c (r1.2 -> r1.3)
        zlib.c (r1.1.1.1 -> r1.2)

Added Files:
-----------
    src/sys/net:
        bpf_jitter.c (r1.1)
        bpf_jitter.h (r1.1)
        bridgestp.h (r1.1)
        ieee8023ad_lacp.c (r1.1)
        ieee8023ad_lacp.h (r1.1)
        if_edsc.c (r1.1)
        if_enc.c (r1.1)
        if_lagg.c (r1.1)
        if_lagg.h (r1.1)

Removed Files:
-------------
    src/sys/net:
        bpf_compat.h
        bridge.c
        bridge.h
        net_osdep.h

-------------- next part --------------
Index: if_disc.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_disc.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_disc.c -L sys/net/if_disc.c -u -r1.1.1.1 -r1.2
--- sys/net/if_disc.c
+++ sys/net/if_disc.c
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	From: @(#)if_loop.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/net/if_disc.c,v 1.48.2.1 2005/11/25 14:41:31 glebius Exp $
+ * $FreeBSD: src/sys/net/if_disc.c,v 1.54 2007/03/26 09:10:28 yar Exp $
  */
 
 /*
@@ -62,25 +62,22 @@
 #define DISCNAME	"disc"
 
 struct disc_softc {
-	struct ifnet *sc_ifp;	/* must be first */
-	LIST_ENTRY(disc_softc) sc_list;
+	struct ifnet *sc_ifp;
 };
 
 static int	discoutput(struct ifnet *, struct mbuf *,
 		    struct sockaddr *, struct rtentry *);
 static void	discrtrequest(int, struct rtentry *, struct rt_addrinfo *);
 static int	discioctl(struct ifnet *, u_long, caddr_t);
-static int	disc_clone_create(struct if_clone *, int);
+static int	disc_clone_create(struct if_clone *, int, caddr_t);
 static void	disc_clone_destroy(struct ifnet *);
 
-static struct mtx disc_mtx;
 static MALLOC_DEFINE(M_DISC, DISCNAME, "Discard interface");
-static LIST_HEAD(, disc_softc) disc_softc_list;
 
 IFC_SIMPLE_DECLARE(disc, 0);
 
 static int
-disc_clone_create(struct if_clone *ifc, int unit)
+disc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct ifnet		*ifp;
 	struct disc_softc	*sc;
@@ -104,60 +101,34 @@
 	ifp->if_snd.ifq_maxlen = 20;
 	if_attach(ifp);
 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
-	mtx_lock(&disc_mtx);
-	LIST_INSERT_HEAD(&disc_softc_list, sc, sc_list);
-	mtx_unlock(&disc_mtx);
 
 	return (0);
 }
 
 static void
-disc_destroy(struct disc_softc *sc)
-{
-
-	bpfdetach(sc->sc_ifp);
-	if_detach(sc->sc_ifp);
-	if_free(sc->sc_ifp);
-
-	free(sc, M_DISC);
-}
-
-static void
 disc_clone_destroy(struct ifnet *ifp)
 {
 	struct disc_softc	*sc;
 
 	sc = ifp->if_softc;
-	mtx_lock(&disc_mtx);
-	LIST_REMOVE(sc, sc_list);
-	mtx_unlock(&disc_mtx);
 
-	disc_destroy(sc);
+	bpfdetach(ifp);
+	if_detach(ifp);
+	if_free(ifp);
+
+	free(sc, M_DISC);
 }
 
 static int
 disc_modevent(module_t mod, int type, void *data)
 {
-	struct disc_softc *sc;
 
 	switch (type) {
 	case MOD_LOAD:
-		mtx_init(&disc_mtx, "disc_mtx", NULL, MTX_DEF);
-		LIST_INIT(&disc_softc_list);
 		if_clone_attach(&disc_cloner);
 		break;
 	case MOD_UNLOAD:
 		if_clone_detach(&disc_cloner);
-
-		mtx_lock(&disc_mtx);
-		while ((sc = LIST_FIRST(&disc_softc_list)) != NULL) {
-			LIST_REMOVE(sc, sc_list);
-			mtx_unlock(&disc_mtx);
-			disc_destroy(sc);
-			mtx_lock(&disc_mtx);
-		}
-		mtx_unlock(&disc_mtx);
-		mtx_destroy(&disc_mtx);
 		break;
 	default:
 		return (EOPNOTSUPP);
@@ -187,7 +158,7 @@
 		dst->sa_family = af;
 	}
 
-	if (ifp->if_bpf) {
+	if (bpf_peers_present(ifp->if_bpf)) {
 		u_int af = dst->sa_family;
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
 	}
Index: if_bridge.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/net/if_bridge.c -L sys/net/if_bridge.c -u -r1.2 -r1.3
--- sys/net/if_bridge.c
+++ sys/net/if_bridge.c
@@ -80,10 +80,11 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/net/if_bridge.c,v 1.11.2.28 2006/04/02 04:41:53 thompsa Exp $");
+__FBSDID("$FreeBSD: src/sys/net/if_bridge.c,v 1.103.2.3 2007/12/21 05:29:15 thompsa Exp $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
+#include "opt_carp.h"
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
@@ -96,9 +97,11 @@
 #include <sys/ctype.h>  /* string functions */
 #include <sys/kernel.h>
 #include <sys/random.h>
+#include <sys/syslog.h>
 #include <sys/sysctl.h>
 #include <vm/uma.h>
 #include <sys/module.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
@@ -120,10 +123,15 @@
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif
+#ifdef DEV_CARP
+#include <netinet/ip_carp.h>
+#endif
 #include <machine/in_cksum.h>
 #include <netinet/if_ether.h> /* for struct arpcom */
+#include <net/bridgestp.h>
 #include <net/if_bridgevar.h>
 #include <net/if_llc.h>
+#include <net/if_vlan_var.h>
 
 #include <net/route.h>
 #include <netinet/ip_fw.h>
@@ -146,17 +154,6 @@
 #endif
 
 /*
- * Spanning tree defaults.
- */
-#define	BSTP_DEFAULT_MAX_AGE		(20 * 256)
-#define	BSTP_DEFAULT_HELLO_TIME		(2 * 256)
-#define	BSTP_DEFAULT_FORWARD_DELAY	(15 * 256)
-#define	BSTP_DEFAULT_HOLD_TIME		(1 * 256)
-#define	BSTP_DEFAULT_BRIDGE_PRIORITY	0x8000
-#define	BSTP_DEFAULT_PORT_PRIORITY	0x80
-#define	BSTP_DEFAULT_PATH_COST		55
-
-/*
  * Timeout (in seconds) for entries learned dynamically.
  */
 #ifndef BRIDGE_RTABLE_TIMEOUT
@@ -175,6 +172,53 @@
  */
 #define	BRIDGE_IFCAPS_MASK		IFCAP_TXCSUM
 
+/*
+ * Bridge interface list entry.
+ */
+struct bridge_iflist {
+	LIST_ENTRY(bridge_iflist) bif_next;
+	struct ifnet		*bif_ifp;	/* member if */
+	struct bstp_port	bif_stp;	/* STP state */
+	uint32_t		bif_flags;	/* member if flags */
+	int			bif_mutecap;	/* member muted caps */
+};
+
+/*
+ * Bridge route node.
+ */
+struct bridge_rtnode {
+	LIST_ENTRY(bridge_rtnode) brt_hash;	/* hash table linkage */
+	LIST_ENTRY(bridge_rtnode) brt_list;	/* list linkage */
+	struct ifnet		*brt_ifp;	/* destination if */
+	unsigned long		brt_expire;	/* expiration time */
+	uint8_t			brt_flags;	/* address flags */
+	uint8_t			brt_addr[ETHER_ADDR_LEN];
+	uint16_t		brt_vlan;	/* vlan id */
+};
+
+/*
+ * Software state for each bridge.
+ */
+struct bridge_softc {
+	struct ifnet		*sc_ifp;	/* make this an interface */
+	LIST_ENTRY(bridge_softc) sc_list;
+	struct mtx		sc_mtx;
+	struct cv		sc_cv;
+	uint32_t		sc_brtmax;	/* max # of addresses */
+	uint32_t		sc_brtcnt;	/* cur. # of addresses */
+	uint32_t		sc_brttimeout;	/* rt timeout in seconds */
+	struct callout		sc_brcallout;	/* bridge callout */
+	uint32_t		sc_iflist_ref;	/* refcount for sc_iflist */
+	uint32_t		sc_iflist_xcnt;	/* refcount for sc_iflist */
+	LIST_HEAD(, bridge_iflist) sc_iflist;	/* member interface list */
+	LIST_HEAD(, bridge_rtnode) *sc_rthash;	/* our forwarding table */
+	LIST_HEAD(, bridge_rtnode) sc_rtlist;	/* list version of above */
+	uint32_t		sc_rthash_key;	/* key for hash */
+	LIST_HEAD(, bridge_iflist) sc_spanlist;	/* span ports list */
+	struct bstp_state	sc_stp;		/* STP state */
+	uint32_t		sc_brtexceeded;	/* # of cache drops */
+};
+
 static struct mtx 	bridge_list_mtx;
 eventhandler_tag	bridge_detach_cookie = NULL;
 
@@ -182,7 +226,7 @@
 
 uma_zone_t bridge_rtnode_zone;
 
-static int	bridge_clone_create(struct if_clone *, int);
+static int	bridge_clone_create(struct if_clone *, int, caddr_t);
 static void	bridge_clone_destroy(struct ifnet *);
 
 static int	bridge_ioctl(struct ifnet *, u_long, caddr_t);
@@ -195,8 +239,12 @@
 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
 static int	bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *,
 		    struct rtentry *);
+static void	bridge_enqueue(struct bridge_softc *, struct ifnet *,
+		    struct mbuf *);
+static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
 
-static void	bridge_forward(struct bridge_softc *, struct mbuf *m);
+static void	bridge_forward(struct bridge_softc *, struct bridge_iflist *,
+		    struct mbuf *m);
 
 static void	bridge_timer(void *);
 
@@ -205,23 +253,27 @@
 static void	bridge_span(struct bridge_softc *, struct mbuf *);
 
 static int	bridge_rtupdate(struct bridge_softc *, const uint8_t *,
-		    struct ifnet *, int, uint8_t);
-static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
+		    uint16_t, struct bridge_iflist *, int, uint8_t);
+static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
+		    uint16_t);
 static void	bridge_rttrim(struct bridge_softc *);
 static void	bridge_rtage(struct bridge_softc *);
 static void	bridge_rtflush(struct bridge_softc *, int);
-static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
+static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
+		    uint16_t);
 
 static int	bridge_rtable_init(struct bridge_softc *);
 static void	bridge_rtable_fini(struct bridge_softc *);
 
 static int	bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
-		    const uint8_t *);
+		    const uint8_t *, uint16_t);
 static int	bridge_rtnode_insert(struct bridge_softc *,
 		    struct bridge_rtnode *);
 static void	bridge_rtnode_destroy(struct bridge_softc *,
 		    struct bridge_rtnode *);
+static void	bridge_rtable_expire(struct ifnet *, int);
+static void	bridge_state_change(struct ifnet *, int);
 
 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
 		    const char *name);
@@ -257,12 +309,28 @@
 static int	bridge_ioctl_sifcost(struct bridge_softc *, void *);
 static int	bridge_ioctl_addspan(struct bridge_softc *, void *);
 static int	bridge_ioctl_delspan(struct bridge_softc *, void *);
+static int	bridge_ioctl_gbparam(struct bridge_softc *, void *);
+static int	bridge_ioctl_grte(struct bridge_softc *, void *);
+static int	bridge_ioctl_gifsstp(struct bridge_softc *, void *);
+static int	bridge_ioctl_sproto(struct bridge_softc *, void *);
+static int	bridge_ioctl_stxhc(struct bridge_softc *, void *);
 static int	bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
 		    int);
 static int	bridge_ip_checkbasic(struct mbuf **mp);
-# ifdef INET6
+#ifdef INET6
 static int	bridge_ip6_checkbasic(struct mbuf **mp);
-# endif /* INET6 */
+#endif /* INET6 */
+static int	bridge_fragment(struct ifnet *, struct mbuf *,
+		    struct ether_header *, int, struct llc *);
+
+/* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
+#define	VLANTAGOF(_m)	\
+    (_m->m_flags & M_VLANTAG) ? EVL_VLANOFTAG(_m->m_pkthdr.ether_vtag) : 1
+
+static struct bstp_cb_ops bridge_ops = {
+	.bcb_state = bridge_state_change,
+	.bcb_rtage = bridge_rtable_expire
+};
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
@@ -271,12 +339,23 @@
 static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
 static int pfil_member = 1; /* run pfil hooks on the member interface */
 static int pfil_ipfw = 0;   /* layer2 filter with ipfw */
+static int pfil_ipfw_arp = 0;   /* layer2 filter with ipfw */
+static int pfil_local_phys = 0; /* run pfil hooks on the physical interface for
+                                   locally destined packets */
+static int log_stp   = 0;   /* log STP state changes */
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
     &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
+SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RW,
+    &pfil_ipfw_arp, 0, "Filter ARP packets through IPFW layer2");
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
     &pfil_bridge, 0, "Packet filter on the bridge interface");
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
     &pfil_member, 0, "Packet filter on the member interface");
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, CTLFLAG_RW,
+    &pfil_local_phys, 0,
+    "Packet filter on the physical interface for locally destined packets");
+SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
+    &log_stp, 0, "Log STP state changes");
 
 struct bridge_control {
 	int	(*bc_func)(struct bridge_softc *, void *);
@@ -353,13 +432,25 @@
 	  BC_F_COPYIN|BC_F_SUSER },
 	{ bridge_ioctl_delspan,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
+
+	{ bridge_ioctl_gbparam,		sizeof(struct ifbropreq),
+	  BC_F_COPYOUT },
+
+	{ bridge_ioctl_grte,		sizeof(struct ifbrparam),
+	  BC_F_COPYOUT },
+
+	{ bridge_ioctl_gifsstp,		sizeof(struct ifbpstpconf),
+	  BC_F_COPYIN|BC_F_COPYOUT },
+
+	{ bridge_ioctl_sproto,		sizeof(struct ifbrparam),
+	  BC_F_COPYIN|BC_F_SUSER },
+
+	{ bridge_ioctl_stxhc,		sizeof(struct ifbrparam),
+	  BC_F_COPYIN|BC_F_SUSER },
 };
 const int bridge_control_table_size =
     sizeof(bridge_control_table) / sizeof(bridge_control_table[0]);
 
-static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
-			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-
 LIST_HEAD(, bridge_softc) bridge_list;
 
 IFC_SIMPLE_DECLARE(bridge, 0);
@@ -379,7 +470,6 @@
 		bridge_input_p = bridge_input;
 		bridge_output_p = bridge_output;
 		bridge_dn_p = bridge_dummynet;
-		bstp_linkstate_p = bstp_linkstate;
 		bridge_detach_cookie = EVENTHANDLER_REGISTER(
 		    ifnet_departure_event, bridge_ifdetach, NULL,
 		    EVENTHANDLER_PRI_ANY);
@@ -388,19 +478,16 @@
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 		    bridge_detach_cookie);
 		if_clone_detach(&bridge_cloner);
-		while (!LIST_EMPTY(&bridge_list))
-			bridge_clone_destroy(LIST_FIRST(&bridge_list)->sc_ifp);
 		uma_zdestroy(bridge_rtnode_zone);
 		bridge_input_p = NULL;
 		bridge_output_p = NULL;
 		bridge_dn_p = NULL;
-		bstp_linkstate_p = NULL;
 		mtx_destroy(&bridge_list_mtx);
 		break;
 	default:
-		return EOPNOTSUPP;
+		return (EOPNOTSUPP);
 	}
-	return 0;
+	return (0);
 }
 
 static moduledata_t bridge_mod = {
@@ -410,6 +497,7 @@
 };
 
 DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1);
 
 /*
  * handler for net.link.bridge.pfil_ipfw
@@ -439,7 +527,7 @@
 		}
 	}
 
-	return error;
+	return (error);
 }
 SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw, CTLTYPE_INT|CTLFLAG_RW,
 	    &pfil_ipfw, 0, &sysctl_pfil_ipfw, "I", "Layer2 filter with IPFW");
@@ -450,60 +538,66 @@
  *	Create a new bridge instance.
  */
 static int
-bridge_clone_create(struct if_clone *ifc, int unit)
+bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
-	struct bridge_softc *sc;
-	struct ifnet *ifp;
+	struct bridge_softc *sc, *sc2;
+	struct ifnet *bifp, *ifp;
 	u_char eaddr[6];
+	int retry;
 
 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
-	BRIDGE_LOCK_INIT(sc);
 	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		free(sc, M_DEVBUF);
 		return (ENOSPC);
 	}
 
+	BRIDGE_LOCK_INIT(sc);
 	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
 	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
-	sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
-	sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
-	sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
-	sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
-	sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
 
 	/* Initialize our routing table. */
 	bridge_rtable_init(sc);
 
 	callout_init_mtx(&sc->sc_brcallout, &sc->sc_mtx, 0);
-	callout_init_mtx(&sc->sc_bstpcallout, &sc->sc_mtx, 0);
 
 	LIST_INIT(&sc->sc_iflist);
 	LIST_INIT(&sc->sc_spanlist);
 
 	ifp->if_softc = sc;
 	if_initname(ifp, ifc->ifc_name, unit);
-	ifp->if_mtu = ETHERMTU;
-	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = bridge_ioctl;
-	ifp->if_output = bridge_output;
 	ifp->if_start = bridge_start;
 	ifp->if_init = bridge_init;
 	ifp->if_type = IFT_BRIDGE;
 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
 	ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
 	IFQ_SET_READY(&ifp->if_snd);
-	ifp->if_hdrlen = ETHER_HDR_LEN;
 
 	/*
-	 * Generate a random ethernet address and use the private AC:DE:48
-	 * OUI code.
-	 */
-	arc4rand(eaddr, ETHER_ADDR_LEN, 1);
-	eaddr[0] = 0xAC;
-	eaddr[1] = 0xDE;
-	eaddr[2] = 0x48;
+	 * Generate a random ethernet address with a locally administered
+	 * address.
+	 *
+	 * Since we are using random ethernet addresses for the bridge, it is
+	 * possible that we might have address collisions, so make sure that
+	 * this hardware address isn't already in use on another bridge.
+	 */
+	for (retry = 1; retry != 0;) {
+		arc4rand(eaddr, ETHER_ADDR_LEN, 1);
+		eaddr[0] &= ~1;		/* clear multicast bit */
+		eaddr[0] |= 2;		/* set the LAA bit */
+		retry = 0;
+		mtx_lock(&bridge_list_mtx);
+		LIST_FOREACH(sc2, &bridge_list, sc_list) {
+			bifp = sc2->sc_ifp;
+			if (memcmp(eaddr, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0)
+				retry = 1;
+		}
+		mtx_unlock(&bridge_list_mtx);
+	}
 
+	bstp_attach(&sc->sc_stp, &bridge_ops);
 	ether_ifattach(ifp, eaddr);
 	/* Now undo some of the damage... */
 	ifp->if_baudrate = 0;
@@ -542,12 +636,12 @@
 	BRIDGE_UNLOCK(sc);
 
 	callout_drain(&sc->sc_brcallout);
-	callout_drain(&sc->sc_bstpcallout);
 
 	mtx_lock(&bridge_list_mtx);
 	LIST_REMOVE(sc, sc_list);
 	mtx_unlock(&bridge_list_mtx);
 
+	bstp_detach(&sc->sc_stp);
 	ether_ifdetach(ifp);
 	if_free_type(ifp, IFT_ETHER);
 
@@ -574,13 +668,12 @@
 		struct ifbareq ifbareq;
 		struct ifbaconf ifbaconf;
 		struct ifbrparam ifbrparam;
+		struct ifbropreq ifbropreq;
 	} args;
 	struct ifdrv *ifd = (struct ifdrv *) data;
 	const struct bridge_control *bc;
 	int error = 0;
 
-	BRIDGE_LOCK(sc);
-
 	switch (cmd) {
 
 	case SIOCADDMULTI:
@@ -607,7 +700,7 @@
 		}
 
 		if (bc->bc_flags & BC_F_SUSER) {
-			error = suser(td);
+			error = priv_check(td, PRIV_NET_BRIDGE);
 			if (error)
 				break;
 		}
@@ -618,14 +711,16 @@
 			break;
 		}
 
-		bzero(&args, sizeof args);
+		bzero(&args, sizeof(args));
 		if (bc->bc_flags & BC_F_COPYIN) {
 			error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
 			if (error)
 				break;
 		}
 
+		BRIDGE_LOCK(sc);
 		error = (*bc->bc_func)(sc, &args);
+		BRIDGE_UNLOCK(sc);
 		if (error)
 			break;
 
@@ -641,14 +736,15 @@
 			 * If interface is marked down and it is running,
 			 * then stop and disable it.
 			 */
+			BRIDGE_LOCK(sc);
 			bridge_stop(ifp, 1);
+			BRIDGE_UNLOCK(sc);
 		} else if ((ifp->if_flags & IFF_UP) &&
 		    !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 			/*
 			 * If interface is marked up and it is stopped, then
 			 * start it.
 			 */
-			BRIDGE_UNLOCK(sc);
 			(*ifp->if_init)(sc);
 		}
 		break;
@@ -663,14 +759,10 @@
 		 * drop the lock as ether_ioctl() will call bridge_start() and
 		 * cause the lock to be recursed.
 		 */
-		BRIDGE_UNLOCK(sc);
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 
-	if (BRIDGE_LOCKED(sc))
-		BRIDGE_UNLOCK(sc);
-
 	return (error);
 }
 
@@ -689,7 +781,7 @@
 	if (ifp->if_ioctl == NULL)
 		return;
 
-	bzero(&ifr, sizeof ifr);
+	bzero(&ifr, sizeof(ifr));
 	ifr.ifr_reqcap = ifp->if_capenable;
 
 	if (mute) {
@@ -708,7 +800,6 @@
 		IFF_UNLOCKGIANT(ifp);
 	}
 }
-	
 
 /*
  * bridge_lookup_member:
@@ -787,6 +878,9 @@
 		}
 	}
 
+	if (bif->bif_flags & IFBIF_STP)
+		bstp_disable(&bif->bif_stp);
+
 	ifs->if_bridge = NULL;
 	BRIDGE_XLOCK(sc);
 	LIST_REMOVE(bif, bif_next);
@@ -794,10 +888,10 @@
 
 	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
 
+	BRIDGE_UNLOCK(sc);
+	bstp_destroy(&bif->bif_stp);	/* prepare to free */
+	BRIDGE_LOCK(sc);
 	free(bif, M_DEVBUF);
-
-	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
-		bstp_initialization(sc);
 }
 
 /*
@@ -825,8 +919,6 @@
 	struct ifnet *ifs;
 	int error = 0;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
 	ifs = ifunit(req->ifbr_ifsname);
 	if (ifs == NULL)
 		return (ENOENT);
@@ -859,8 +951,6 @@
 
 	bif->bif_ifp = ifs;
 	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
-	bif->bif_priority = BSTP_DEFAULT_PORT_PRIORITY;
-	bif->bif_path_cost = BSTP_DEFAULT_PATH_COST;
 
 	switch (ifs->if_type) {
 	case IFT_ETHER:
@@ -884,6 +974,7 @@
 	}
 
 	ifs->if_bridge = sc;
+	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
 	/*
 	 * XXX: XLOCK HERE!?!
 	 *
@@ -891,11 +982,6 @@
 	 */
 	LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next);
 
-	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
-		bstp_initialization(sc);
-	else
-		bstp_stop(sc);
-
 out:
 	if (error) {
 		if (bif != NULL)
@@ -910,8 +996,6 @@
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
@@ -926,19 +1010,35 @@
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
-
-	BRIDGE_LOCK_ASSERT(sc);
+	struct bstp_port *bp;
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 
+	bp = &bif->bif_stp;
 	req->ifbr_ifsflags = bif->bif_flags;
-	req->ifbr_state = bif->bif_state;
-	req->ifbr_priority = bif->bif_priority;
-	req->ifbr_path_cost = bif->bif_path_cost;
-	req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
-
+	req->ifbr_state = bp->bp_state;
+	req->ifbr_priority = bp->bp_priority;
+	req->ifbr_path_cost = bp->bp_path_cost;
+	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
+	req->ifbr_proto = bp->bp_protover;
+	req->ifbr_role = bp->bp_role;
+	req->ifbr_stpflags = bp->bp_flags;
+
+	/* Copy STP state options as flags */
+	if (bp->bp_operedge)
+		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
+	if (bp->bp_flags & BSTP_PORT_AUTOEDGE)
+		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
+	if (bp->bp_ptp_link)
+		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
+	if (bp->bp_flags & BSTP_PORT_AUTOPTP)
+		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
+	if (bp->bp_flags & BSTP_PORT_ADMEDGE)
+		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
+	if (bp->bp_flags & BSTP_PORT_ADMCOST)
+		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
 	return (0);
 }
 
@@ -947,33 +1047,37 @@
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
-
-	BRIDGE_LOCK_ASSERT(sc);
+	struct bstp_port *bp;
+	int error;
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
+	bp = &bif->bif_stp;
 
 	if (req->ifbr_ifsflags & IFBIF_SPAN)
 		/* SPAN is readonly */
 		return (EINVAL);
 
 	if (req->ifbr_ifsflags & IFBIF_STP) {
-		switch (bif->bif_ifp->if_type) {
-		case IFT_ETHER:
-			/* These can do spanning tree. */
-			break;
-
-		default:
-			/* Nothing else can. */
-			return (EINVAL);
+		if ((bif->bif_flags & IFBIF_STP) == 0) {
+			error = bstp_enable(&bif->bif_stp);
+			if (error)
+				return (error);
 		}
+	} else {
+		if ((bif->bif_flags & IFBIF_STP) != 0)
+			bstp_disable(&bif->bif_stp);
 	}
 
-	bif->bif_flags = req->ifbr_ifsflags;
+	/* Pass on STP flags */
+	bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
+	bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
+	bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
+	bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
 
-	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
-		bstp_initialization(sc);
+	/* Save the bits relating to the bridge */
+	bif->bif_flags = req->ifbr_ifsflags & IFBIFMASK;
 
 	return (0);
 }
@@ -983,8 +1087,6 @@
 {
 	struct ifbrparam *param = arg;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
 	sc->sc_brtmax = param->ifbrp_csize;
 	bridge_rttrim(sc);
 
@@ -996,8 +1098,6 @@
 {
 	struct ifbrparam *param = arg;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
 	param->ifbrp_csize = sc->sc_brtmax;
 
 	return (0);
@@ -1009,9 +1109,8 @@
 	struct ifbifconf *bifc = arg;
 	struct bridge_iflist *bif;
 	struct ifbreq breq;
-	int count, len, error = 0;
-
-	BRIDGE_LOCK_ASSERT(sc);
+	char *buf, *outbuf;
+	int count, buflen, len, error = 0;
 
 	count = 0;
 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next)
@@ -1019,29 +1118,32 @@
 	LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
 		count++;
 
+	buflen = sizeof(breq) * count;
 	if (bifc->ifbic_len == 0) {
-		bifc->ifbic_len = sizeof(breq) * count;
+		bifc->ifbic_len = buflen;
 		return (0);
 	}
+	BRIDGE_UNLOCK(sc);
+	outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
+	BRIDGE_LOCK(sc);
 
 	count = 0;
-	len = bifc->ifbic_len;
-	bzero(&breq, sizeof breq);
+	buf = outbuf;
+	len = min(bifc->ifbic_len, buflen);
+	bzero(&breq, sizeof(breq));
 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		if (len < sizeof(breq))
 			break;
 
 		strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname,
 		    sizeof(breq.ifbr_ifsname));
-		breq.ifbr_ifsflags = bif->bif_flags;
-		breq.ifbr_state = bif->bif_state;
-		breq.ifbr_priority = bif->bif_priority;
-		breq.ifbr_path_cost = bif->bif_path_cost;
-		breq.ifbr_portno = bif->bif_ifp->if_index & 0xff;
-		error = copyout(&breq, bifc->ifbic_req + count, sizeof(breq));
+		/* Fill in the ifbreq structure */
+		error = bridge_ioctl_gifflags(sc, &breq);
 		if (error)
 			break;
+		memcpy(buf, &breq, sizeof(breq));
 		count++;
+		buf += sizeof(breq);
 		len -= sizeof(breq);
 	}
 	LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
@@ -1051,18 +1153,18 @@
 		strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname,
 		    sizeof(breq.ifbr_ifsname));
 		breq.ifbr_ifsflags = bif->bif_flags;
-		breq.ifbr_state = bif->bif_state;
-		breq.ifbr_priority = bif->bif_priority;
-		breq.ifbr_path_cost = bif->bif_path_cost;
-		breq.ifbr_portno = bif->bif_ifp->if_index & 0xff;
-		error = copyout(&breq, bifc->ifbic_req + count, sizeof(breq));
-		if (error)
-			break;
+		breq.ifbr_portno = bif->bif_ifp->if_index & 0xfff;
+		memcpy(buf, &breq, sizeof(breq));
 		count++;
+		buf += sizeof(breq);
 		len -= sizeof(breq);
 	}
 
+	BRIDGE_UNLOCK(sc);
 	bifc->ifbic_len = sizeof(breq) * count;
+	error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);
+	BRIDGE_LOCK(sc);
+	free(outbuf, M_TEMP);
 	return (error);
 }
 
@@ -1072,21 +1174,32 @@
 	struct ifbaconf *bac = arg;
 	struct bridge_rtnode *brt;
 	struct ifbareq bareq;
-	int count = 0, error = 0, len;
-
-	BRIDGE_LOCK_ASSERT(sc);
+	char *buf, *outbuf;
+	int count, buflen, len, error = 0;
 
 	if (bac->ifbac_len == 0)
 		return (0);
 
-	len = bac->ifbac_len;
-	bzero(&bareq, sizeof bareq);
+	count = 0;
+	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)
+		count++;
+	buflen = sizeof(bareq) * count;
+
+	BRIDGE_UNLOCK(sc);
+	outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
+	BRIDGE_LOCK(sc);
+
+	count = 0;
+	buf = outbuf;
+	len = min(bac->ifbac_len, buflen);
+	bzero(&bareq, sizeof(bareq));
 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
 		if (len < sizeof(bareq))
 			goto out;
 		strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
 		    sizeof(bareq.ifba_ifsname));
 		memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
+		bareq.ifba_vlan = brt->brt_vlan;
 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
 				time_uptime < brt->brt_expire)
 			bareq.ifba_expire = brt->brt_expire - time_uptime;
@@ -1094,14 +1207,17 @@
 			bareq.ifba_expire = 0;
 		bareq.ifba_flags = brt->brt_flags;
 
-		error = copyout(&bareq, bac->ifbac_req + count, sizeof(bareq));
-		if (error)
-			goto out;
+		memcpy(buf, &bareq, sizeof(bareq));
 		count++;
+		buf += sizeof(bareq);
 		len -= sizeof(bareq);
 	}
 out:
+	BRIDGE_UNLOCK(sc);
 	bac->ifbac_len = sizeof(bareq) * count;
+	error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);
+	BRIDGE_LOCK(sc);
+	free(outbuf, M_TEMP);
 	return (error);
 }
 
@@ -1112,13 +1228,11 @@
 	struct bridge_iflist *bif;
 	int error;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 
-	error = bridge_rtupdate(sc, req->ifba_dst, bif->bif_ifp, 1,
+	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
 	    req->ifba_flags);
 
 	return (error);
@@ -1129,10 +1243,7 @@
 {
 	struct ifbrparam *param = arg;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
 	sc->sc_brttimeout = param->ifbrp_ctime;
-
 	return (0);
 }
 
@@ -1141,10 +1252,7 @@
 {
 	struct ifbrparam *param = arg;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
 	param->ifbrp_ctime = sc->sc_brttimeout;
-
 	return (0);
 }
 
@@ -1153,9 +1261,7 @@
 {
 	struct ifbareq *req = arg;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
-	return (bridge_rtdaddr(sc, req->ifba_dst));
+	return (bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan));
 }
 
 static int
@@ -1163,10 +1269,7 @@
 {
 	struct ifbreq *req = arg;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
 	bridge_rtflush(sc, req->ifbr_ifsflags);
-
 	return (0);
 }
 
@@ -1174,11 +1277,9 @@
 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
+	struct bstp_state *bs = &sc->sc_stp;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
-	param->ifbrp_prio = sc->sc_bridge_priority;
-
+	param->ifbrp_prio = bs->bs_bridge_priority;
 	return (0);
 }
 
@@ -1187,25 +1288,16 @@
 {
 	struct ifbrparam *param = arg;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
-	sc->sc_bridge_priority = param->ifbrp_prio;
-
-	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
-		bstp_initialization(sc);
-
-	return (0);
+	return (bstp_set_priority(&sc->sc_stp, param->ifbrp_prio));
 }
 
 static int
 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
+	struct bstp_state *bs = &sc->sc_stp;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
-	param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
-
+	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
 	return (0);
 }
 
@@ -1214,27 +1306,16 @@
 {
 	struct ifbrparam *param = arg;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
-	if (param->ifbrp_hellotime == 0)
-		return (EINVAL);
-	sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
-
-	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
-		bstp_initialization(sc);
-
-	return (0);
+	return (bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime));
 }
 
 static int
 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
+	struct bstp_state *bs = &sc->sc_stp;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
-	param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
-
+	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
 	return (0);
 }
 
@@ -1243,27 +1324,16 @@
 {
 	struct ifbrparam *param = arg;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
-	if (param->ifbrp_fwddelay == 0)
-		return (EINVAL);
-	sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
-
-	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
-		bstp_initialization(sc);
-
-	return (0);
+	return (bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay));
 }
 
 static int
 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
 {
 	struct ifbrparam *param = arg;
+	struct bstp_state *bs = &sc->sc_stp;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
-	param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
-
+	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
 	return (0);
 }
 
@@ -1272,16 +1342,7 @@
 {
 	struct ifbrparam *param = arg;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
-	if (param->ifbrp_maxage == 0)
-		return (EINVAL);
-	sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
-
-	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
-		bstp_initialization(sc);
-
-	return (0);
+	return (bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage));
 }
 
 static int
@@ -1290,18 +1351,11 @@
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 
-	bif->bif_priority = req->ifbr_priority;
-
-	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
-		bstp_initialization(sc);
-
-	return (0);
+	return (bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority));
 }
 
 static int
@@ -1310,18 +1364,11 @@
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
 		return (ENOENT);
 
-	bif->bif_path_cost = req->ifbr_path_cost;
-
-	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
-		bstp_initialization(sc);
-
-	return (0);
+	return (bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost));
 }
 
 static int
@@ -1331,8 +1378,6 @@
 	struct bridge_iflist *bif = NULL;
 	struct ifnet *ifs;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
 	ifs = ifunit(req->ifbr_ifsname);
 	if (ifs == NULL)
 		return (ENOENT);
@@ -1346,6 +1391,7 @@
 
 	switch (ifs->if_type) {
 		case IFT_ETHER:
+		case IFT_GIF:
 		case IFT_L2VLAN:
 			break;
 		default:
@@ -1371,8 +1417,6 @@
 	struct bridge_iflist *bif;
 	struct ifnet *ifs;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
 	ifs = ifunit(req->ifbr_ifsname);
 	if (ifs == NULL)
 		return (ENOENT);
@@ -1389,6 +1433,120 @@
 	return (0);
 }
 
+static int
+bridge_ioctl_gbparam(struct bridge_softc *sc, void *arg)
+{
+	struct ifbropreq *req = arg;
+	struct bstp_state *bs = &sc->sc_stp;
+	struct bstp_port *root_port;
+
+	req->ifbop_maxage = bs->bs_bridge_max_age >> 8;
+	req->ifbop_hellotime = bs->bs_bridge_htime >> 8;
+	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;
+
+	root_port = bs->bs_root_port;
+	if (root_port == NULL)
+		req->ifbop_root_port = 0;
+	else
+		req->ifbop_root_port = root_port->bp_ifp->if_index;
+
+	req->ifbop_holdcount = bs->bs_txholdcount;
+	req->ifbop_priority = bs->bs_bridge_priority;
+	req->ifbop_protocol = bs->bs_protover;
+	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;
+	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;
+	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;
+	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;
+	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;
+	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;
+
+	return (0);
+}
+
+static int
+bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	param->ifbrp_cexceeded = sc->sc_brtexceeded;
+	return (0);
+}
+
+static int
+bridge_ioctl_gifsstp(struct bridge_softc *sc, void *arg)
+{
+	struct ifbpstpconf *bifstp = arg;
+	struct bridge_iflist *bif;
+	struct bstp_port *bp;
+	struct ifbpstpreq bpreq;
+	char *buf, *outbuf;
+	int count, buflen, len, error = 0;
+
+	count = 0;
+	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+		if ((bif->bif_flags & IFBIF_STP) != 0)
+			count++;
+	}
+
+	buflen = sizeof(bpreq) * count;
+	if (bifstp->ifbpstp_len == 0) {
+		bifstp->ifbpstp_len = buflen;
+		return (0);
+	}
+
+	BRIDGE_UNLOCK(sc);
+	outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
+	BRIDGE_LOCK(sc);
+
+	count = 0;
+	buf = outbuf;
+	len = min(bifstp->ifbpstp_len, buflen);
+	bzero(&bpreq, sizeof(bpreq));
+	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+		if (len < sizeof(bpreq))
+			break;
+
+		if ((bif->bif_flags & IFBIF_STP) == 0)
+			continue;
+
+		bp = &bif->bif_stp;
+		bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;
+		bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;
+		bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;
+		bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;
+		bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id;
+		bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;
+
+		memcpy(buf, &bpreq, sizeof(bpreq));
+		count++;
+		buf += sizeof(bpreq);
+		len -= sizeof(bpreq);
+	}
+
+	BRIDGE_UNLOCK(sc);
+	bifstp->ifbpstp_len = sizeof(bpreq) * count;
+	error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len);
+	BRIDGE_LOCK(sc);
+	free(outbuf, M_TEMP);
+	return (error);
+}
+
+static int
+bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	return (bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto));
+}
+
+static int
+bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	return (bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc));
+}
+
 /*
  * bridge_ifdetach:
  *
@@ -1447,9 +1605,9 @@
 	    bridge_timer, sc);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
-	bstp_initialization(sc);
+	bstp_init(&sc->sc_stp);		/* Initialize Spanning Tree */
+
 	BRIDGE_UNLOCK(sc);
-	return;
 }
 
 /*
@@ -1468,7 +1626,7 @@
 		return;
 
 	callout_stop(&sc->sc_brcallout);
-	bstp_stop(sc);
+	bstp_stop(&sc->sc_stp);
 
 	bridge_rtflush(sc, IFBF_FLUSHDYN);
 
@@ -1481,16 +1639,41 @@
  *	Enqueue a packet on a bridge member interface.
  *
  */
-__inline void
+static void
 bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
 {
-	int len, err;
+	int len, err = 0;
 	short mflags;
+	struct mbuf *m0;
 
 	len = m->m_pkthdr.len;
 	mflags = m->m_flags;
 
-	IFQ_ENQUEUE(&dst_ifp->if_snd, m, err);
+	/* We may be sending a fragment so traverse the mbuf */
+	for (; m; m = m0) {
+		m0 = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+
+		/*
+		 * If underlying interface can not do VLAN tag insertion itself
+		 * then attach a packet tag that holds it.
+		 */
+		if ((m->m_flags & M_VLANTAG) &&
+		    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
+			m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
+			if (m == NULL) {
+				if_printf(dst_ifp,
+				    "unable to prepend VLAN header\n");
+				dst_ifp->if_oerrors++;
+				continue;
+			}
+			m->m_flags &= ~M_VLANTAG;
+		}
+
+		if (err == 0)
+			IFQ_ENQUEUE(&dst_ifp->if_snd, m, err);
+	}
+
 	if (err == 0) {
 
 		sc->sc_ifp->if_opackets++;
@@ -1533,9 +1716,9 @@
 		return;
 	}
 
-	if (inet_pfil_hook.ph_busy_count >= 0
+	if (PFIL_HOOKED(&inet_pfil_hook)
 #ifdef INET6
-	    || inet6_pfil_hook.ph_busy_count >= 0
+	    || PFIL_HOOKED(&inet6_pfil_hook)
 #endif
 	    ) {
 		if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0)
@@ -1564,6 +1747,7 @@
 	struct ether_header *eh;
 	struct ifnet *dst_if;
 	struct bridge_softc *sc;
+	uint16_t vlan;
 
 	if (m->m_len < ETHER_HDR_LEN) {
 		m = m_pullup(m, ETHER_HDR_LEN);
@@ -1573,6 +1757,7 @@
 
 	eh = mtod(m, struct ether_header *);
 	sc = ifp->if_bridge;
+	vlan = VLANTAGOF(m);
 
 	BRIDGE_LOCK(sc);
 
@@ -1593,20 +1778,20 @@
 	if (ETHER_IS_MULTICAST(eh->ether_dhost))
 		dst_if = NULL;
 	else
-		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
+		dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
 	if (dst_if == NULL) {
 		struct bridge_iflist *bif;
 		struct mbuf *mc;
 		int error = 0, used = 0;
 
+		bridge_span(sc, m);
+
 		BRIDGE_LOCK2REF(sc, error);
 		if (error) {
 			m_freem(m);
 			return (0);
 		}
 
-		bridge_span(sc, m);
-
 		LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 			dst_if = bif->bif_ifp;
 
@@ -1621,15 +1806,9 @@
 			 * tree, make sure the port is in a state that
 			 * allows forwarding.
 			 */
-			if (dst_if != ifp &&
-			    (bif->bif_flags & IFBIF_STP) != 0) {
-				switch (bif->bif_state) {
-				case BSTP_IFSTATE_BLOCKING:
-				case BSTP_IFSTATE_LISTENING:
-				case BSTP_IFSTATE_DISABLED:
-					continue;
-				}
-			}
+			if (dst_if != ifp && (bif->bif_flags & IFBIF_STP) &&
+			    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
+				continue;
 
 			if (LIST_NEXT(bif, bif_next) == NULL) {
 				used = 1;
@@ -1688,14 +1867,14 @@
 		IFQ_DEQUEUE(&ifp->if_snd, m);
 		if (m == 0)
 			break;
-		BPF_MTAP(ifp, m);
+		ETHER_BPF_MTAP(ifp, m);
 
 		eh = mtod(m, struct ether_header *);
 		dst_if = NULL;
 
 		BRIDGE_LOCK(sc);
 		if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
-			dst_if = bridge_rtlookup(sc, eh->ether_dhost);
+			dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1);
 		}
 
 		if (dst_if == NULL)
@@ -1706,8 +1885,6 @@
 		}
 	}
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-
-	return;
 }
 
 /*
@@ -1718,41 +1895,28 @@
  *	NOTE: Releases the lock on return.
  */
 static void
-bridge_forward(struct bridge_softc *sc, struct mbuf *m)
+bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
+    struct mbuf *m)
 {
-	struct bridge_iflist *bif;
+	struct bridge_iflist *dbif;
 	struct ifnet *src_if, *dst_if, *ifp;
 	struct ether_header *eh;
+	uint16_t vlan;
 
 	src_if = m->m_pkthdr.rcvif;
-	BRIDGE_LOCK_ASSERT(sc);
 	ifp = sc->sc_ifp;
 
 	sc->sc_ifp->if_ipackets++;
 	sc->sc_ifp->if_ibytes += m->m_pkthdr.len;
+	vlan = VLANTAGOF(m);
 
-	/*
-	 * Look up the bridge_iflist.
-	 */
-	bif = bridge_lookup_member_if(sc, src_if);
-	if (bif == NULL) {
-		/* Interface is not a bridge member (anymore?) */
+	if ((sbif->bif_flags & IFBIF_STP) &&
+	    sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
 		BRIDGE_UNLOCK(sc);
 		m_freem(m);
 		return;
 	}
 
-	if (bif->bif_flags & IFBIF_STP) {
-		switch (bif->bif_state) {
-		case BSTP_IFSTATE_BLOCKING:
-		case BSTP_IFSTATE_LISTENING:
-		case BSTP_IFSTATE_DISABLED:
-			BRIDGE_UNLOCK(sc);
-			m_freem(m);
-			return;
-		}
-	}
-
 	eh = mtod(m, struct ether_header *);
 
 	/*
@@ -1760,7 +1924,7 @@
 	 * address is valid and not multicast, record
 	 * the address.
 	 */
-	if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
+	if ((sbif->bif_flags & IFBIF_LEARNING) != 0 &&
 	    ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
 	    (eh->ether_shost[0] == 0 &&
 	     eh->ether_shost[1] == 0 &&
@@ -1768,12 +1932,12 @@
 	     eh->ether_shost[3] == 0 &&
 	     eh->ether_shost[4] == 0 &&
 	     eh->ether_shost[5] == 0) == 0) {
-		(void) bridge_rtupdate(sc, eh->ether_shost,
-		    src_if, 0, IFBAF_DYNAMIC);
+		(void) bridge_rtupdate(sc, eh->ether_shost, vlan,
+		    sbif, 0, IFBAF_DYNAMIC);
 	}
 
-	if ((bif->bif_flags & IFBIF_STP) != 0 &&
-	    bif->bif_state == BSTP_IFSTATE_LEARNING) {
+	if ((sbif->bif_flags & IFBIF_STP) != 0 &&
+	    sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
 		m_freem(m);
 		BRIDGE_UNLOCK(sc);
 		return;
@@ -1789,7 +1953,7 @@
 	 * "this" side of the bridge, drop it.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
-		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
+		dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
 		if (src_if == dst_if) {
 			BRIDGE_UNLOCK(sc);
 			m_freem(m);
@@ -1801,10 +1965,22 @@
 		dst_if = NULL;
 	}
 
+	/*
+	 * If we have a destination interface which is a member of our bridge,
+	 * OR this is a unicast packet, push it through the bpf(4) machinery.
+	 * For broadcast or multicast packets, don't bother because it will
+	 * be reinjected into ether_input. We do this before we pass the packets
+	 * through the pfil(9) framework, as it is possible that pfil(9) will
+	 * drop the packet, or possibly modify it, making it difficult to debug
+	 * firewall issues on the bridge.
+	 */
+	if (dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0)
+		ETHER_BPF_MTAP(ifp, m);
+
 	/* run the packet filter */
-	if (inet_pfil_hook.ph_busy_count >= 0
+	if (PFIL_HOOKED(&inet_pfil_hook)
 #ifdef INET6
-	    || inet6_pfil_hook.ph_busy_count >= 0
+	    || PFIL_HOOKED(&inet6_pfil_hook)
 #endif
 	    ) {
 		BRIDGE_UNLOCK(sc);
@@ -1816,13 +1992,6 @@
 	}
 
 	if (dst_if == NULL) {
-		/*
-		 * Tap off packets passing the bridge. Broadcast packets will
-		 * already be tapped as they are reinjected into ether_input.
-		 */
-		if ((m->m_flags & (M_BCAST|M_MCAST)) == 0)
-			BPF_MTAP(ifp, m);
-
 		bridge_broadcast(sc, src_if, m, 1);
 		return;
 	}
@@ -1836,32 +2005,33 @@
 		m_freem(m);
 		return;
 	}
-	bif = bridge_lookup_member_if(sc, dst_if);
-	if (bif == NULL) {
+	dbif = bridge_lookup_member_if(sc, dst_if);
+	if (dbif == NULL) {
 		/* Not a member of the bridge (anymore?) */
 		BRIDGE_UNLOCK(sc);
 		m_freem(m);
 		return;
 	}
 
-	if (bif->bif_flags & IFBIF_STP) {
-		switch (bif->bif_state) {
-		case BSTP_IFSTATE_DISABLED:
-		case BSTP_IFSTATE_BLOCKING:
-			BRIDGE_UNLOCK(sc);
-			m_freem(m);
-			return;
-		}
+	/* Private segments can not talk to each other */
+	if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE) {
+		BRIDGE_UNLOCK(sc);
+		m_freem(m);
+		return;
 	}
 
-	/* tap off packets passing the bridge */
-	BPF_MTAP(ifp, m);
+	if ((dbif->bif_flags & IFBIF_STP) &&
+	    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
+		BRIDGE_UNLOCK(sc);
+		m_freem(m);
+		return;
+	}
 
 	BRIDGE_UNLOCK(sc);
 
-	if (inet_pfil_hook.ph_busy_count >= 0
+	if (PFIL_HOOKED(&inet_pfil_hook)
 #ifdef INET6
-	    || inet6_pfil_hook.ph_busy_count >= 0
+	    || PFIL_HOOKED(&inet6_pfil_hook)
 #endif
 	    ) {
 		if (bridge_pfil(&m, sc->sc_ifp, dst_if, PFIL_OUT) != 0)
@@ -1883,16 +2053,32 @@
 bridge_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct bridge_softc *sc = ifp->if_bridge;
-	struct bridge_iflist *bif;
+	struct bridge_iflist *bif, *bif2;
 	struct ifnet *bifp;
 	struct ether_header *eh;
 	struct mbuf *mc, *mc2;
+	uint16_t vlan;
 
 	if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return (m);
 
 	bifp = sc->sc_ifp;
+	vlan = VLANTAGOF(m);
 
+	/*
+	 * Implement support for bridge monitoring. If this flag has been
+	 * set on this interface, discard the packet once we push it through
+	 * the bpf(4) machinery, but before we do, increment the byte and
+	 * packet counters associated with this interface.
+	 */
+	if ((bifp->if_flags & IFF_MONITOR) != 0) {
+		m->m_pkthdr.rcvif  = bifp;
+		ETHER_BPF_MTAP(bifp, m);
+		bifp->if_ipackets++;
+		bifp->if_ibytes += m->m_pkthdr.len;
+		m_freem(m);
+		return (NULL);
+	}
 	BRIDGE_LOCK(sc);
 	bif = bridge_lookup_member_if(sc, ifp);
 	if (bif == NULL) {
@@ -1902,21 +2088,44 @@
 
 	eh = mtod(m, struct ether_header *);
 
-	if (memcmp(eh->ether_dhost, IFP2ENADDR(bifp),
+	if (memcmp(eh->ether_dhost, IF_LLADDR(bifp),
 	    ETHER_ADDR_LEN) == 0) {
+		/* Block redundant paths to us */
+		if ((bif->bif_flags & IFBIF_STP) &&
+		    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
+			BRIDGE_UNLOCK(sc);
+			return (m);
+		}
+
+		/*
+		 * Filter on the physical interface.
+		 */
+		if (pfil_local_phys && (PFIL_HOOKED(&inet_pfil_hook)
+#ifdef INET6
+		    || PFIL_HOOKED(&inet6_pfil_hook)
+#endif
+		    )) {
+			if (bridge_pfil(&m, NULL, ifp, PFIL_IN) != 0 ||
+			    m == NULL) {
+				BRIDGE_UNLOCK(sc);
+				return (NULL);
+			}
+		}
+
 		/*
 		 * If the packet is for us, set the packets source as the
 		 * bridge, and return the packet back to ether_input for
 		 * local processing.
 		 */
 
-		/* XXX Do we tap the packet for the member interface too?
-		 * BPF_MTAP(&m->m_pkthdr.rcvif, m);
-		 */
+		/* Note where to send the reply to */
+		if (bif->bif_flags & IFBIF_LEARNING)
+			(void) bridge_rtupdate(sc,
+			    eh->ether_shost, vlan, bif, 0, IFBAF_DYNAMIC);
 
 		/* Mark the packet as arriving on the bridge interface */
 		m->m_pkthdr.rcvif = bifp;
-		BPF_MTAP(bifp, m);
+		ETHER_BPF_MTAP(bifp, m);
 		bifp->if_ipackets++;
 
 		BRIDGE_UNLOCK(sc);
@@ -1925,33 +2134,23 @@
 
 	bridge_span(sc, m);
 
-	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
+	if (m->m_flags & (M_BCAST|M_MCAST)) {
 		/* Tap off 802.1D packets; they do not get forwarded. */
 		if (memcmp(eh->ether_dhost, bstp_etheraddr,
 		    ETHER_ADDR_LEN) == 0) {
-			m = bstp_input(ifp, m);
+			m = bstp_input(&bif->bif_stp, ifp, m);
 			if (m == NULL) {
 				BRIDGE_UNLOCK(sc);
 				return (NULL);
 			}
 		}
 
-		if (bif->bif_flags & IFBIF_STP) {
-			switch (bif->bif_state) {
-			case BSTP_IFSTATE_BLOCKING:
-			case BSTP_IFSTATE_LISTENING:
-			case BSTP_IFSTATE_DISABLED:
-				BRIDGE_UNLOCK(sc);
-				return (m);
-			}
+		if ((bif->bif_flags & IFBIF_STP) &&
+		    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
+			BRIDGE_UNLOCK(sc);
+			return (m);
 		}
 
-		if (bcmp(etherbroadcastaddr, eh->ether_dhost,
-		    sizeof(etherbroadcastaddr)) == 0)
-			m->m_flags |= M_BCAST;
-		else
-			m->m_flags |= M_MCAST;
-
 		/*
 		 * Make a deep copy of the packet and enqueue the copy
 		 * for bridge processing; return the original packet for
@@ -1964,7 +2163,7 @@
 		}
 
 		/* Perform the bridge forwarding function with the copy. */
-		bridge_forward(sc, mc);
+		bridge_forward(sc, bif, mc);
 
 		/*
 		 * Reinject the mbuf as arriving on the bridge so we have a
@@ -1989,44 +2188,70 @@
 		return (m);
 	}
 
-	if (bif->bif_flags & IFBIF_STP) {
-		switch (bif->bif_state) {
-		case BSTP_IFSTATE_BLOCKING:
-		case BSTP_IFSTATE_LISTENING:
-		case BSTP_IFSTATE_DISABLED:
-			BRIDGE_UNLOCK(sc);
-			return (m);
-		}
+	if ((bif->bif_flags & IFBIF_STP) &&
+	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
+		BRIDGE_UNLOCK(sc);
+		return (m);
 	}
 
-	/*
-	 * Unicast.  Make sure it's not for us.
-	 */
-	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		if (bif->bif_ifp->if_type == IFT_GIF)
-			continue;
-		/* It is destined for us. */
-		if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_dhost,
-		    ETHER_ADDR_LEN) == 0) {
-			if (bif->bif_flags & IFBIF_LEARNING)
-				(void) bridge_rtupdate(sc,
-				    eh->ether_shost, ifp, 0, IFBAF_DYNAMIC);
-			m->m_pkthdr.rcvif = bif->bif_ifp;
-			BRIDGE_UNLOCK(sc);
-			return (m);
-		}
+#ifdef DEV_CARP
+#   define OR_CARP_CHECK_WE_ARE_DST(iface) \
+	|| ((iface)->if_carp \
+	    && carp_forus((iface)->if_carp, eh->ether_dhost))
+#   define OR_CARP_CHECK_WE_ARE_SRC(iface) \
+	|| ((iface)->if_carp \
+	    && carp_forus((iface)->if_carp, eh->ether_shost))
+#else
+#   define OR_CARP_CHECK_WE_ARE_DST(iface)
+#   define OR_CARP_CHECK_WE_ARE_SRC(iface)
+#endif
 
-		/* We just received a packet that we sent out. */
-		if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_shost,
-		    ETHER_ADDR_LEN) == 0) {
-			BRIDGE_UNLOCK(sc);
-			m_freem(m);
-			return (NULL);
-		}
+#define GRAB_OUR_PACKETS(iface) \
+	if ((iface)->if_type == IFT_GIF) \
+		continue; \
+	/* It is destined for us. */ \
+	if (memcmp(IF_LLADDR((iface)), eh->ether_dhost,  ETHER_ADDR_LEN) == 0 \
+	    OR_CARP_CHECK_WE_ARE_DST((iface))				\
+	    ) {								\
+		if (bif->bif_flags & IFBIF_LEARNING)			\
+			(void) bridge_rtupdate(sc, eh->ether_shost,	\
+			    vlan, bif, 0, IFBAF_DYNAMIC);		\
+		m->m_pkthdr.rcvif = iface;				\
+		BRIDGE_UNLOCK(sc);					\
+		return (m);						\
+	}								\
+									\
+	/* We just received a packet that we sent out. */		\
+	if (memcmp(IF_LLADDR((iface)), eh->ether_shost, ETHER_ADDR_LEN) == 0 \
+	    OR_CARP_CHECK_WE_ARE_SRC((iface))			\
+	    ) {								\
+		BRIDGE_UNLOCK(sc);					\
+		m_freem(m);						\
+		return (NULL);						\
 	}
 
+	/*
+	 * Unicast.  Make sure it's not for us.
+	 *
+	 * Give a chance for ifp at first priority. This will help when	the
+	 * packet comes through the interface like VLAN's with the same MACs
+	 * on several interfaces from the same bridge. This also will save
+	 * some CPU cycles in case the destination interface and the input
+	 * interface (eq ifp) are the same.
+	 */
+	do { GRAB_OUR_PACKETS(ifp) } while (0);
+
+	/* Now check the all bridge members. */
+	LIST_FOREACH(bif2, &sc->sc_iflist, bif_next) {
+		GRAB_OUR_PACKETS(bif2->bif_ifp)
+	}
+
+#undef OR_CARP_CHECK_WE_ARE_DST
+#undef OR_CARP_CHECK_WE_ARE_SRC
+#undef GRAB_OUR_PACKETS
+
 	/* Perform the bridge forwarding function. */
-	bridge_forward(sc, m);
+	bridge_forward(sc, bif, m);
 
 	return (NULL);
 }
@@ -2044,12 +2269,13 @@
 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
     struct mbuf *m, int runfilt)
 {
-	struct bridge_iflist *bif;
+	struct bridge_iflist *dbif, *sbif;
 	struct mbuf *mc;
 	struct ifnet *dst_if;
 	int error = 0, used = 0, i;
 
-	BRIDGE_LOCK_ASSERT(sc);
+	sbif = bridge_lookup_member_if(sc, src_if);
+
 	BRIDGE_LOCK2REF(sc, error);
 	if (error) {
 		m_freem(m);
@@ -2057,9 +2283,9 @@
 	}
 
 	/* Filter on the bridge interface before broadcasting */
-	if (runfilt && (inet_pfil_hook.ph_busy_count >= 0
+	if (runfilt && (PFIL_HOOKED(&inet_pfil_hook)
 #ifdef INET6
-	    || inet6_pfil_hook.ph_busy_count >= 0
+	    || PFIL_HOOKED(&inet6_pfil_hook)
 #endif
 	    )) {
 		if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0)
@@ -2068,27 +2294,27 @@
 			goto out;
 	}
 
-	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		dst_if = bif->bif_ifp;
+	LIST_FOREACH(dbif, &sc->sc_iflist, bif_next) {
+		dst_if = dbif->bif_ifp;
 		if (dst_if == src_if)
 			continue;
 
-		if (bif->bif_flags & IFBIF_STP) {
-			switch (bif->bif_state) {
-			case BSTP_IFSTATE_BLOCKING:
-			case BSTP_IFSTATE_DISABLED:
-				continue;
-			}
-		}
+		/* Private segments can not talk to each other */
+		if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE))
+			continue;
+
+		if ((dbif->bif_flags & IFBIF_STP) &&
+		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
+			continue;
 
-		if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
+		if ((dbif->bif_flags & IFBIF_DISCOVER) == 0 &&
 		    (m->m_flags & (M_BCAST|M_MCAST)) == 0)
 			continue;
 
 		if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			continue;
 
-		if (LIST_NEXT(bif, bif_next) == NULL) {
+		if (LIST_NEXT(dbif, bif_next) == NULL) {
 			mc = m;
 			used = 1;
 		} else {
@@ -2104,9 +2330,9 @@
 		 * pointer so we do not redundantly filter on the bridge for
 		 * each interface we broadcast on.
 		 */
-		if (runfilt && (inet_pfil_hook.ph_busy_count >= 0
+		if (runfilt && (PFIL_HOOKED(&inet_pfil_hook)
 #ifdef INET6
-		    || inet6_pfil_hook.ph_busy_count >= 0
+		    || PFIL_HOOKED(&inet6_pfil_hook)
 #endif
 		    )) {
 			if (used == 0) {
@@ -2151,7 +2377,7 @@
 
 	LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
 		dst_if = bif->bif_ifp;
-		
+
 		if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			continue;
 
@@ -2171,21 +2397,28 @@
  *	Add a bridge routing entry.
  */
 static int
-bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
-    struct ifnet *dst_if, int setflags, uint8_t flags)
+bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
+    struct bridge_iflist *bif, int setflags, uint8_t flags)
 {
 	struct bridge_rtnode *brt;
+	struct ifnet *dst_if = bif->bif_ifp;
 	int error;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
+	/* 802.1p frames map to vlan 1 */
+	if (vlan == 0)
+		vlan = 1;
+
 	/*
 	 * A route for this destination might already exist.  If so,
 	 * update it, otherwise create a new one.
 	 */
-	if ((brt = bridge_rtnode_lookup(sc, dst)) == NULL) {
-		if (sc->sc_brtcnt >= sc->sc_brtmax)
+	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
+		if (sc->sc_brtcnt >= sc->sc_brtmax) {
+			sc->sc_brtexceeded++;
 			return (ENOSPC);
+		}
 
 		/*
 		 * Allocate a new bridge forwarding node, and
@@ -2196,8 +2429,14 @@
 		if (brt == NULL)
 			return (ENOMEM);
 
-		brt->brt_flags = IFBAF_DYNAMIC;
+		if (bif->bif_flags & IFBIF_STICKY)
+			brt->brt_flags = IFBAF_STICKY;
+		else
+			brt->brt_flags = IFBAF_DYNAMIC;
+
+		brt->brt_ifp = dst_if;
 		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
+		brt->brt_vlan = vlan;
 
 		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
 			uma_zfree(bridge_rtnode_zone, brt);
@@ -2221,13 +2460,13 @@
  *	Lookup the destination interface for an address.
  */
 static struct ifnet *
-bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
+bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
 {
 	struct bridge_rtnode *brt;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
-	if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
+	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL)
 		return (NULL);
 
 	return (brt->brt_ifp);
@@ -2256,8 +2495,7 @@
 	if (sc->sc_brtcnt <= sc->sc_brtmax)
 		return;
 
-	for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
-		nbrt = LIST_NEXT(brt, brt_list);
+	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
 			bridge_rtnode_destroy(sc, brt);
 			if (sc->sc_brtcnt <= sc->sc_brtmax)
@@ -2297,8 +2535,7 @@
 
 	BRIDGE_LOCK_ASSERT(sc);
 
-	for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
-		nbrt = LIST_NEXT(brt, brt_list);
+	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
 			if (time_uptime >= brt->brt_expire)
 				bridge_rtnode_destroy(sc, brt);
@@ -2318,8 +2555,7 @@
 
 	BRIDGE_LOCK_ASSERT(sc);
 
-	for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
-		nbrt = LIST_NEXT(brt, brt_list);
+	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
 		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
 			bridge_rtnode_destroy(sc, brt);
 	}
@@ -2331,17 +2567,23 @@
  *	Remove an address from the table.
  */
 static int
-bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
+bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
 {
 	struct bridge_rtnode *brt;
+	int found = 0;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
-	if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
-		return (ENOENT);
+	/*
+	 * If vlan is zero then we want to delete for all vlans so the lookup
+	 * may return more than one.
+	 */
+	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
+		bridge_rtnode_destroy(sc, brt);
+		found = 1;
+	}
 
-	bridge_rtnode_destroy(sc, brt);
-	return (0);
+	return (found ? 0 : ENOENT);
 }
 
 /*
@@ -2349,15 +2591,14 @@
  *
  *	Delete routes to a speicifc member interface.
  */
-void
+static void
 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
 {
 	struct bridge_rtnode *brt, *nbrt;
 
 	BRIDGE_LOCK_ASSERT(sc);
 
-	for (brt = LIST_FIRST(&sc->sc_rtlist); brt != NULL; brt = nbrt) {
-		nbrt = LIST_NEXT(brt, brt_list);
+	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
 		if (brt->brt_ifp == ifp && (full ||
 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC))
 			bridge_rtnode_destroy(sc, brt);
@@ -2452,10 +2693,11 @@
 /*
  * bridge_rtnode_lookup:
  *
- *	Look up a bridge route node for the specified destination.
+ *	Look up a bridge route node for the specified destination. Compare the
+ *	vlan id or if zero then just return the first match.
  */
 static struct bridge_rtnode *
-bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
+bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
 {
 	struct bridge_rtnode *brt;
 	uint32_t hash;
@@ -2466,7 +2708,7 @@
 	hash = bridge_rthash(sc, addr);
 	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
 		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
-		if (dir == 0)
+		if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0))
 			return (brt);
 		if (dir > 0)
 			return (NULL);
@@ -2500,7 +2742,7 @@
 
 	do {
 		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
-		if (dir == 0)
+		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan)
 			return (EEXIST);
 		if (dir > 0) {
 			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
@@ -2542,6 +2784,60 @@
 }
 
 /*
+ * bridge_rtable_expire:
+ *
+ *	Set the expiry time for all routes on an interface.
+ */
+static void
+bridge_rtable_expire(struct ifnet *ifp, int age)
+{
+	struct bridge_softc *sc = ifp->if_bridge;
+	struct bridge_rtnode *brt;
+
+	BRIDGE_LOCK(sc);
+
+	/*
+	 * If the age is zero then flush, otherwise set all the expiry times to
+	 * age for the interface
+	 */
+	if (age == 0)
+		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
+	else {
+		LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
+			/* Cap the expiry time to 'age' */
+			if (brt->brt_ifp == ifp &&
+			    brt->brt_expire > time_uptime + age &&
+			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
+				brt->brt_expire = time_uptime + age;
+		}
+	}
+	BRIDGE_UNLOCK(sc);
+}
+
+/*
+ * bridge_state_change:
+ *
+ *	Callback from the bridgestp code when a port changes states.
+ */
+static void
+bridge_state_change(struct ifnet *ifp, int state)
+{
+	struct bridge_softc *sc = ifp->if_bridge;
+	static const char *stpstates[] = {
+		"disabled",
+		"listening",
+		"learning",
+		"forwarding",
+		"blocking",
+		"discarding"
+	};
+
+	if (log_stp)
+		log(LOG_NOTICE, "%s: state changed to %s on %s\n",
+		    sc->sc_ifp->if_xname, stpstates[state], ifp->if_xname);
+}
+
+/*
  * Send bridge packets through pfil if they are one of the types pfil can deal
  * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
  * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
@@ -2550,7 +2846,7 @@
 static int
 bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
 {
-	int snap, error, i;
+	int snap, error, i, hlen;
 	struct ether_header *eh1, eh2;
 	struct ip_fw_args args;
 	struct ip *ip;
@@ -2560,18 +2856,20 @@
 	snap = 0;
 	error = -1;	/* Default error if not error == 0 */
 
+#if 0
 	/* we may return with the IP fields swapped, ensure its not shared */
 	KASSERT(M_WRITABLE(*mp), ("%s: modifying a shared mbuf", __func__));
+#endif
 
 	if (pfil_bridge == 0 && pfil_member == 0 && pfil_ipfw == 0)
-		return 0; /* filtering is disabled */
+		return (0); /* filtering is disabled */
 
 	i = min((*mp)->m_pkthdr.len, max_protohdr);
 	if ((*mp)->m_len < i) {
 	    *mp = m_pullup(*mp, i);
 	    if (*mp == NULL) {
 		printf("%s: m_pullup failed\n", __func__);
-		return -1;
+		return (-1);
 	    }
 	}
 
@@ -2605,11 +2903,14 @@
 	switch (ether_type) {
 		case ETHERTYPE_ARP:
 		case ETHERTYPE_REVARP:
-			return 0; /* Automatically pass */
+			if (pfil_ipfw_arp == 0)
+				return (0); /* Automatically pass */
+			break;
+
 		case ETHERTYPE_IP:
-# ifdef INET6
+#ifdef INET6
 		case ETHERTYPE_IPV6:
-# endif /* INET6 */
+#endif /* INET6 */
 			break;
 		default:
 			/*
@@ -2639,11 +2940,11 @@
 			case ETHERTYPE_IP:
 				error = bridge_ip_checkbasic(mp);
 				break;
-# ifdef INET6
+#ifdef INET6
 			case ETHERTYPE_IPV6:
 				error = bridge_ip6_checkbasic(mp);
 				break;
-# endif /* INET6 */
+#endif /* INET6 */
 			default:
 				error = 0;
 		}
@@ -2666,14 +2967,14 @@
 		*mp = args.m;
 
 		if (*mp == NULL)
-			return error;
+			return (error);
 
 		if (DUMMYNET_LOADED && (i == IP_FW_DUMMYNET)) {
 
 			/* put the Ethernet header back on */
 			M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
 			if (*mp == NULL)
-				return error;
+				return (error);
 			bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
 
 			/*
@@ -2682,7 +2983,7 @@
 			 */
 			args.oif = ifp;
 			ip_dn_io_ptr(*mp, DN_TO_IFB_FWD, &args);
-			return error;
+			return (error);
 		}
 
 		if (i != IP_FW_PASS) /* drop */
@@ -2695,9 +2996,8 @@
 	/*
 	 * Run the packet through pfil
 	 */
-	switch (ether_type)
-	{
-	case ETHERTYPE_IP :
+	switch (ether_type) {
+	case ETHERTYPE_IP:
 		/*
 		 * before calling the firewall, swap fields the same as
 		 * IP does. here we assume the header is contiguous
@@ -2732,16 +3032,42 @@
 			error = pfil_run_hooks(&inet_pfil_hook, mp, bifp,
 					dir, NULL);
 
-		/* Restore ip and the fields ntohs()'d. */
-		if (*mp != NULL && error == 0) {
+		if (*mp == NULL || error != 0) /* filter may consume */
+			break;
+
+		/* check if we need to fragment the packet */
+		if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
+			i = (*mp)->m_pkthdr.len;
+			if (i > ifp->if_mtu) {
+				error = bridge_fragment(ifp, *mp, &eh2, snap,
+					    &llc1);
+				return (error);
+			}
+		}
+
+		/* Recalculate the ip checksum and restore byte ordering */
+		ip = mtod(*mp, struct ip *);
+		hlen = ip->ip_hl << 2;
+		if (hlen < sizeof(struct ip))
+			goto bad;
+		if (hlen > (*mp)->m_len) {
+			if ((*mp = m_pullup(*mp, hlen)) == 0)
+				goto bad;
 			ip = mtod(*mp, struct ip *);
-			ip->ip_len = htons(ip->ip_len);
-			ip->ip_off = htons(ip->ip_off);
+			if (ip == NULL)
+				goto bad;
 		}
+		ip->ip_len = htons(ip->ip_len);
+		ip->ip_off = htons(ip->ip_off);
+		ip->ip_sum = 0;
+		if (hlen == sizeof(struct ip))
+			ip->ip_sum = in_cksum_hdr(ip);
+		else
+			ip->ip_sum = in_cksum(*mp, hlen);
 
 		break;
-# ifdef INET6
-	case ETHERTYPE_IPV6 :
+#ifdef INET6
+	case ETHERTYPE_IPV6:
 		if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
 			error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
 					dir, NULL);
@@ -2760,14 +3086,14 @@
 			error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
 					dir, NULL);
 		break;
-# endif
-	default :
+#endif
+	default:
 		error = 0;
 		break;
 	}
 
 	if (*mp == NULL)
-		return error;
+		return (error);
 	if (error != 0)
 		goto bad;
 
@@ -2779,21 +3105,21 @@
 	if (snap) {
 		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
 		if (*mp == NULL)
-			return error;
+			return (error);
 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
 	}
 
 	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
 	if (*mp == NULL)
-		return error;
+		return (error);
 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
 
-	return 0;
+	return (0);
 
 bad:
 	m_freem(*mp);
 	*mp = NULL;
-	return error;
+	return (error);
 }
 
 /*
@@ -2817,7 +3143,7 @@
 	u_short sum;
 
 	if (*mp == NULL)
-		return -1;
+		return (-1);
 
 	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
 		if ((m = m_copyup(m, sizeof(struct ip),
@@ -2890,14 +3216,14 @@
 
 	/* Checks out, proceed */
 	*mp = m;
-	return 0;
+	return (0);
 
 bad:
 	*mp = m;
-	return -1;
+	return (-1);
 }
 
-# ifdef INET6
+#ifdef INET6
 /*
  * Same as above, but for IPv6.
  * Cut-and-pasted from ip6_input.c.
@@ -2943,10 +3269,66 @@
 
 	/* Checks out, proceed */
 	*mp = m;
-	return 0;
+	return (0);
 
 bad:
 	*mp = m;
-	return -1;
+	return (-1);
+}
+#endif /* INET6 */
+
+/*
+ * bridge_fragment:
+ *
+ *	Return a fragmented mbuf chain.
+ */
+static int
+bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
+    int snap, struct llc *llc)
+{
+	struct mbuf *m0;
+	struct ip *ip;
+	int error = -1;
+
+	if (m->m_len < sizeof(struct ip) &&
+	    (m = m_pullup(m, sizeof(struct ip))) == NULL)
+		goto out;
+	ip = mtod(m, struct ip *);
+
+	error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
+		    CSUM_DELAY_IP);
+	if (error)
+		goto out;
+
+	/* walk the chain and re-add the Ethernet header */
+	for (m0 = m; m0; m0 = m0->m_nextpkt) {
+		if (error == 0) {
+			if (snap) {
+				M_PREPEND(m0, sizeof(struct llc), M_DONTWAIT);
+				if (m0 == NULL) {
+					error = ENOBUFS;
+					continue;
+				}
+				bcopy(llc, mtod(m0, caddr_t),
+				    sizeof(struct llc));
+			}
+			M_PREPEND(m0, ETHER_HDR_LEN, M_DONTWAIT);
+			if (m0 == NULL) {
+				error = ENOBUFS;
+				continue;
+			}
+			bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
+		} else 
+			m_freem(m);
+	}
+
+	if (error == 0)
+		ipstat.ips_fragmented++;
+
+	return (error);
+
+out:
+	if (m != NULL)
+		m_freem(m);
+	return (error);
 }
-# endif /* INET6 */
Index: ethernet.h
===================================================================
RCS file: /home/cvs/src/sys/net/ethernet.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/ethernet.h -L sys/net/ethernet.h -u -r1.1.1.1 -r1.2
--- sys/net/ethernet.h
+++ sys/net/ethernet.h
@@ -1,7 +1,7 @@
 /*
  * Fundamental constants relating to ethernet.
  *
- * $FreeBSD: src/sys/net/ethernet.h,v 1.24 2004/10/05 19:28:52 sam Exp $
+ * $FreeBSD: src/sys/net/ethernet.h,v 1.32.2.1 2007/10/28 16:24:16 thompsa Exp $
  *
  */
 
@@ -57,18 +57,23 @@
 /*
  * Structure of a 10Mb/s Ethernet header.
  */
-struct	ether_header {
+struct ether_header {
 	u_char	ether_dhost[ETHER_ADDR_LEN];
 	u_char	ether_shost[ETHER_ADDR_LEN];
 	u_short	ether_type;
-};
+} __packed;
 
 /*
  * Structure of a 48-bit Ethernet address.
  */
-struct	ether_addr {
+struct ether_addr {
 	u_char octet[ETHER_ADDR_LEN];
-};
+} __packed;
+
+#ifdef CTASSERT
+CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
+CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
+#endif
 
 #define	ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */
 
@@ -311,6 +316,7 @@
 #define	ETHERTYPE_IPAS		0x876C	/* IP Autonomous Systems (RFC1701) */
 #define	ETHERTYPE_SECUREDATA	0x876D	/* Secure Data (RFC1701) */
 #define	ETHERTYPE_FLOWCONTROL	0x8808	/* 802.3x flow control packet */
+#define	ETHERTYPE_SLOW		0x8809	/* 802.3ad link aggregation (LACP) */
 #define	ETHERTYPE_PPP		0x880B	/* PPP (obsolete by PPPOE) */
 #define	ETHERTYPE_HITACHI	0x8820	/* Hitachi Cable (Optoelectronic Systems Laboratory) */
 #define	ETHERTYPE_MPLS		0x8847	/* MPLS Unicast */
@@ -343,6 +349,22 @@
 #define	ETHERMTU	(ETHER_MAX_LEN-ETHER_HDR_LEN-ETHER_CRC_LEN)
 #define	ETHERMIN	(ETHER_MIN_LEN-ETHER_HDR_LEN-ETHER_CRC_LEN)
 #define	ETHERMTU_JUMBO	(ETHER_MAX_LEN_JUMBO - ETHER_HDR_LEN - ETHER_CRC_LEN)
+/*
+ * The ETHER_BPF_MTAP macro should be used by drivers which support hardware
+ * offload for VLAN tag processing.  It will check the mbuf to see if it has
+ * M_VLANTAG set, and if it does, will pass the packet along to
+ * ether_vlan_mtap.  This function will re-insert VLAN tags for the duration
+ * of the tap, so they show up properly for network analyzers.
+ */
+#define ETHER_BPF_MTAP(_ifp, _m) do {					\
+	if (bpf_peers_present((_ifp)->if_bpf)) {			\
+		M_ASSERTVALID(_m);					\
+		if (((_m)->m_flags & M_VLANTAG) != 0)			\
+			ether_vlan_mtap((_ifp)->if_bpf, (_m), NULL, 0);	\
+		else							\
+			bpf_mtap((_ifp)->if_bpf, (_m));			\
+	}								\
+} while (0)
 
 #ifdef _KERNEL
 
@@ -350,17 +372,21 @@
 struct mbuf;
 struct rtentry;
 struct sockaddr;
+struct bpf_if;
 
 extern	uint32_t ether_crc32_le(const uint8_t *, size_t);
 extern	uint32_t ether_crc32_be(const uint8_t *, size_t);
 extern	void ether_demux(struct ifnet *, struct mbuf *);
 extern	void ether_ifattach(struct ifnet *, const u_int8_t *);
 extern	void ether_ifdetach(struct ifnet *);
-extern	int  ether_ioctl(struct ifnet *, int, caddr_t);
+extern	int  ether_ioctl(struct ifnet *, u_long, caddr_t);
 extern	int  ether_output(struct ifnet *,
 		   struct mbuf *, struct sockaddr *, struct rtentry *);
 extern	int  ether_output_frame(struct ifnet *, struct mbuf *);
 extern	char *ether_sprintf(const u_int8_t *);
+void	ether_vlan_mtap(struct bpf_if *, struct mbuf *,
+	    void *, u_int);
+struct mbuf  *ether_vlanencap(struct mbuf *, uint16_t);
 
 #else /* _KERNEL */
 
@@ -371,9 +397,11 @@
  */
 __BEGIN_DECLS
 struct	ether_addr *ether_aton(const char *);
+struct	ether_addr *ether_aton_r(const char *, struct ether_addr *);
 int	ether_hostton(const char *, struct ether_addr *);
 int	ether_line(const char *, struct ether_addr *, char *);
 char 	*ether_ntoa(const struct ether_addr *);
+char 	*ether_ntoa_r(const struct ether_addr *, char *);
 int	ether_ntohost(char *, const struct ether_addr *);
 __END_DECLS
 
Index: fddi.h
===================================================================
RCS file: /home/cvs/src/sys/net/fddi.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/fddi.h -L sys/net/fddi.h -u -r1.1.1.1 -r1.2
--- sys/net/fddi.h
+++ sys/net/fddi.h
@@ -33,7 +33,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_fddi.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/net/fddi.h,v 1.14 2005/01/07 01:45:34 imp Exp $
+ * $FreeBSD: src/sys/net/fddi.h,v 1.15 2005/11/11 07:36:14 ru Exp $
  */
 
 #ifndef _NETINET_IF_FDDI_H_
@@ -97,7 +97,7 @@
 #define	FDDI_BPF_UNSUPPORTED	0
 #define	FDDI_BPF_SUPPORTED	1
 
-void	fddi_ifattach(struct ifnet *, int);
+void	fddi_ifattach(struct ifnet *, const u_int8_t *, int);
 void	fddi_ifdetach(struct ifnet *, int);
 int	fddi_ioctl(struct ifnet *, int, caddr_t);
 
Index: ppp_deflate.c
===================================================================
RCS file: /home/cvs/src/sys/net/ppp_deflate.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/net/ppp_deflate.c -L sys/net/ppp_deflate.c -u -r1.2 -r1.3
--- sys/net/ppp_deflate.c
+++ sys/net/ppp_deflate.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/net/ppp_deflate.c,v 1.21.2.1 2006/03/01 21:40:14 wkoszek Exp $	*/
+/* $FreeBSD: src/sys/net/ppp_deflate.c,v 1.22 2006/02/27 16:56:22 wkoszek Exp $	*/
 
 /*-
  * ppp_deflate.c - interface the zlib procedures for Deflate compression
Index: zlib.c
===================================================================
RCS file: /home/cvs/src/sys/net/zlib.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/zlib.c -L sys/net/zlib.c -u -r1.1.1.1 -r1.2
--- sys/net/zlib.c
+++ sys/net/zlib.c
@@ -10,7 +10,7 @@
  * - added inflateIncomp and deflateOutputPending
  * - allow strm->next_out to be NULL, meaning discard the output
  *
- * $FreeBSD: src/sys/net/zlib.c,v 1.19 2005/01/07 01:45:35 imp Exp $
+ * $FreeBSD: src/sys/net/zlib.c,v 1.20 2005/09/11 16:13:02 rodrigc Exp $
  */
 
 /* 
@@ -94,7 +94,7 @@
 typedef ush FAR ushf;
 typedef unsigned long  ulg;
 
-extern const char *z_errmsg[10]; /* indexed by 2-zlib_error */
+static const char *z_errmsg[10]; /* indexed by 2-zlib_error */
 /* (size given to avoid silly warnings with Visual C++) */
 
 #define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)]
Index: if_llc.h
===================================================================
RCS file: /home/cvs/src/sys/net/if_llc.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_llc.h -L sys/net/if_llc.h -u -r1.1.1.1 -r1.2
--- sys/net/if_llc.h
+++ sys/net/if_llc.h
@@ -29,7 +29,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_llc.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/net/if_llc.h,v 1.12 2005/06/05 03:13:12 thompsa Exp $
+ * $FreeBSD: src/sys/net/if_llc.h,v 1.13 2006/12/01 17:50:11 imp Exp $
  */
 
 #ifndef _NET_IF_LLC_H_
@@ -52,15 +52,15 @@
 		u_int8_t format_id;
 		u_int8_t class;
 		u_int8_t window_x2;
-	    } type_u __packed;
+	    } __packed type_u;
 	    struct {
 		u_int8_t num_snd_x2;
 		u_int8_t num_rcv_x2;
-	    } type_i __packed;
+	    } __packed type_i;
 	    struct {
 		u_int8_t control;
 		u_int8_t num_rcv_x2;
-	    } type_s __packed;
+	    } __packed type_s;
 	    struct {
 	        u_int8_t control;
 		/*
@@ -72,17 +72,17 @@
 		u_int8_t frmr_control;
 		u_int8_t frmr_control_ext;
 		u_int8_t frmr_cause;
-	    } type_frmr __packed;
+	    } __packed type_frmr;
 	    struct {
 		u_int8_t  control;
 		u_int8_t  org_code[3];
 		u_int16_t ether_type;
-	    } type_snap __packed;
+	    } __packed type_snap;
 	    struct {
 		u_int8_t control;
 		u_int8_t control_ext;
-	    } type_raw __packed;
-	} llc_un /* XXX __packed ??? */;
+	    } __packed type_raw;
+	} __packed llc_un;
 } __packed;
 
 struct frmrinfo {
@@ -114,6 +114,10 @@
 #define LLC_FRMRLEN    7
 #define LLC_SNAPFRAMELEN 8
 
+#ifdef CTASSERT
+CTASSERT(sizeof (struct llc) == LLC_SNAPFRAMELEN);
+#endif
+
 /*
  * Unnumbered LLC format commands
  */
Index: if_sl.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_sl.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_sl.c -L sys/net/if_sl.c -u -r1.1.1.1 -r1.2
--- sys/net/if_sl.c
+++ sys/net/if_sl.c
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_sl.c	8.6 (Berkeley) 2/1/94
- * $FreeBSD: src/sys/net/if_sl.c,v 1.129 2005/06/10 16:49:18 brooks Exp $
+ * $FreeBSD: src/sys/net/if_sl.c,v 1.133 2006/11/06 13:42:02 rwatson Exp $
  */
 
 /*
@@ -68,6 +68,7 @@
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
@@ -366,7 +367,7 @@
 	register struct sl_softc *sc;
 	int s, error;
 
-	error = suser(curthread);
+	error = priv_check(curthread, PRIV_NET_SLIP);
 	if (error)
 		return (error);
 
@@ -628,7 +629,7 @@
 		 * output queue.  We are being called in lieu of ttstart
 		 * and must do what it would.
 		 */
-		(*tp->t_oproc)(tp);
+		tt_oproc(tp);
 
 		if (tp->t_outq.c_cc != 0) {
 			if (sc != NULL)
@@ -662,7 +663,7 @@
 		 * queueing, and the connection id compression will get
 		 * munged when this happens.
 		 */
-		if (SL2IFP(sc)->if_bpf) {
+		if (bpf_peers_present(SL2IFP(sc)->if_bpf)) {
 			/*
 			 * We need to save the TCP/IP header before it's
 			 * compressed.  To avoid complicated code, we just
@@ -696,7 +697,7 @@
 				*mtod(m, u_char *) |= sl_compress_tcp(m, ip,
 				    &sc->sc_comp, 1);
 		}
-		if (SL2IFP(sc)->if_bpf && sc->bpfbuf) {
+		if (bpf_peers_present(SL2IFP(sc)->if_bpf) && sc->bpfbuf) {
 			/*
 			 * Put the SLIP pseudo-"link header" in place.  The
 			 * compressed header is now at the beginning of the
@@ -876,15 +877,15 @@
 			 * this one is within the time limit.
 			 */
 			if (sc->sc_abortcount &&
-			    time_second >= sc->sc_starttime + ABT_WINDOW)
+			    time_uptime >= sc->sc_starttime + ABT_WINDOW)
 				sc->sc_abortcount = 0;
 			/*
 			 * If we see an abort after "idle" time, count it;
 			 * record when the first abort escape arrived.
 			 */
-			if (time_second >= sc->sc_lasttime + ABT_IDLE) {
+			if (time_uptime >= sc->sc_lasttime + ABT_IDLE) {
 				if (++sc->sc_abortcount == 1)
-					sc->sc_starttime = time_second;
+					sc->sc_starttime = time_uptime;
 				if (sc->sc_abortcount >= ABT_COUNT) {
 					slclose(tp,0);
 					return 0;
@@ -892,7 +893,7 @@
 			}
 		} else
 			sc->sc_abortcount = 0;
-		sc->sc_lasttime = time_second;
+		sc->sc_lasttime = time_uptime;
 	}
 
 	switch (c) {
@@ -922,7 +923,7 @@
 			/* less than min length packet - ignore */
 			goto newpack;
 
-		if (SL2IFP(sc)->if_bpf) {
+		if (bpf_peers_present(SL2IFP(sc)->if_bpf)) {
 			/*
 			 * Save the compressed header, so we
 			 * can tack it on later.  Note that we
@@ -961,7 +962,7 @@
 			} else
 				goto error;
 		}
-		if (SL2IFP(sc)->if_bpf) {
+		if (bpf_peers_present(SL2IFP(sc)->if_bpf)) {
 			/*
 			 * Put the SLIP pseudo-"link header" in place.
 			 * We couldn't do this any earlier since
@@ -1110,7 +1111,7 @@
 			s = splimp ();
 			++SL2IFP(sc)->if_obytes;
 			(void) putc(FRAME_END, &tp->t_outq);
-			(*tp->t_oproc)(tp);
+			tt_oproc(tp);
 			splx (s);
 		} else
 			sc->sc_flags |= SC_OUTWAIT;
Index: if_arcsubr.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_arcsubr.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_arcsubr.c -L sys/net/if_arcsubr.c -u -r1.1.1.1 -r1.2
--- sys/net/if_arcsubr.c
+++ sys/net/if_arcsubr.c
@@ -1,5 +1,5 @@
 /*	$NetBSD: if_arcsubr.c,v 1.36 2001/06/14 05:44:23 itojun Exp $	*/
-/*	$FreeBSD: src/sys/net/if_arcsubr.c,v 1.25.2.2 2005/08/25 05:01:19 rwatson Exp $ */
+/*	$FreeBSD: src/sys/net/if_arcsubr.c,v 1.30 2006/04/12 07:44:31 rwatson Exp $ */
 
 /*-
  * Copyright (c) 1994, 1995 Ignatios Souvatzis
@@ -100,11 +100,8 @@
  * Assumes that ifp is actually pointer to arccom structure.
  */
 int
-arc_output(ifp, m, dst, rt0)
-	struct ifnet *ifp;
-	struct mbuf *m;
-	struct sockaddr *dst;
-	struct rtentry *rt0;
+arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+    struct rtentry *rt0)
 {
 	struct arc_header	*ah;
 	int			error;
@@ -248,8 +245,7 @@
 }
 
 void
-arc_frag_init(ifp)
-	struct ifnet *ifp;
+arc_frag_init(struct ifnet *ifp)
 {
 	struct arccom *ac;
 
@@ -258,8 +254,7 @@
 }
 
 struct mbuf *
-arc_frag_next(ifp)
-	struct ifnet *ifp;
+arc_frag_next(struct ifnet *ifp)
 {
 	struct arccom *ac;
 	struct mbuf *m;
@@ -354,9 +349,7 @@
  */
 
 static __inline struct mbuf *
-arc_defrag(ifp, m)
-	struct ifnet *ifp;
-	struct mbuf *m;
+arc_defrag(struct ifnet *ifp, struct mbuf *m)
 {
 	struct arc_header *ah, *ah1;
 	struct arccom *ac;
@@ -504,8 +497,7 @@
  * Easiest is to assume that everybody else uses that, too.
  */
 int
-arc_isphds(type)
-	u_int8_t type;
+arc_isphds(u_int8_t type)
 {
 	return (type != ARCTYPE_IP_OLD &&
 		type != ARCTYPE_ARP_OLD &&
@@ -518,9 +510,7 @@
  * the ARCnet header.
  */
 void
-arc_input(ifp, m)
-	struct ifnet *ifp;
-	struct mbuf *m;
+arc_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct arc_header *ah;
 	int isr;
@@ -559,14 +549,14 @@
 #ifdef INET
 	case ARCTYPE_IP:
 		m_adj(m, ARC_HDRNEWLEN);
-		if (ip_fastforward(m))
+		if ((m = ip_fastforward(m)) == NULL)
 			return;
 		isr = NETISR_IP;
 		break;
 
 	case ARCTYPE_IP_OLD:
 		m_adj(m, ARC_HDRLEN);
-		if (ip_fastforward(m))
+		if ((m = ip_fastforward(m)) == NULL)
 			return;
 		isr = NETISR_IP;
 		break;
@@ -620,9 +610,7 @@
  * Register (new) link level address.
  */
 void
-arc_storelladdr(ifp, lla)
-	struct ifnet *ifp;
-	u_int8_t lla;
+arc_storelladdr(struct ifnet *ifp, u_int8_t lla)
 {
 	ARC_LLADDR(ifp) = lla;
 }
@@ -631,9 +619,7 @@
  * Perform common duties while attaching to interface list
  */
 void
-arc_ifattach(ifp, lla)
-	struct ifnet *ifp;
-	u_int8_t lla;
+arc_ifattach(struct ifnet *ifp, u_int8_t lla)
 {
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
@@ -649,7 +635,7 @@
 #if __FreeBSD_version < 500000
 	ifa = ifnet_addrs[ifp->if_index - 1];
 #else
-	ifa = ifaddr_byindex(ifp->if_index);
+	ifa = ifp->if_addr;
 #endif
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
@@ -674,18 +660,14 @@
 }
 
 void
-arc_ifdetach(ifp)
-	struct ifnet *ifp;
+arc_ifdetach(struct ifnet *ifp)
 {
 	bpfdetach(ifp);
 	if_detach(ifp);
 }
 
 int
-arc_ioctl(ifp, command, data)
-	struct ifnet *ifp;
-	int command;
-	caddr_t data;
+arc_ioctl(struct ifnet *ifp, int command, caddr_t data)
 {
 	struct ifaddr *ifa = (struct ifaddr *) data;
 	struct ifreq *ifr = (struct ifreq *) data;
@@ -772,10 +754,8 @@
 
 /* based on ether_resolvemulti() */
 int
-arc_resolvemulti(ifp, llsa, sa)
-	struct ifnet *ifp;
-	struct sockaddr **llsa;
-	struct sockaddr *sa;
+arc_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
+    struct sockaddr *sa)
 {
 	struct sockaddr_dl *sdl;
 	struct sockaddr_in *sin;
Index: if.h
===================================================================
RCS file: /home/cvs/src/sys/net/if.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/net/if.h -L sys/net/if.h -u -r1.1.1.2 -r1.2
--- sys/net/if.h
+++ sys/net/if.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/net/if.h,v 1.96.2.4 2006/02/15 03:37:15 ps Exp $
+ * $FreeBSD: src/sys/net/if.h,v 1.108 2007/06/11 20:08:11 andre Exp $
  */
 
 #ifndef _NET_IF_H_
@@ -83,8 +83,8 @@
 	u_char	ifi_addrlen;		/* media address length */
 	u_char	ifi_hdrlen;		/* media header length */
 	u_char	ifi_link_state;		/* current link state */
-	u_char	ifi_recvquota;		/* polling quota for receive intrs */
-	u_char	ifi_xmitquota;		/* polling quota for xmit intrs */
+	u_char	ifi_spare_char1;	/* spare byte */
+	u_char	ifi_spare_char2;	/* spare byte */
 	u_char	ifi_datalen;		/* length of this data struct */
 	u_long	ifi_mtu;		/* maximum transmission unit */
 	u_long	ifi_metric;		/* routing metric (external only) */
@@ -101,11 +101,8 @@
 	u_long	ifi_omcasts;		/* packets sent via multicast */
 	u_long	ifi_iqdrops;		/* dropped on input, this interface */
 	u_long	ifi_noproto;		/* destined for unsupported protocol */
-	u_long	ifi_hwassist;		/* HW offload capabilities */
+	u_long	ifi_hwassist;		/* HW offload capabilities, see IFCAP */
 	time_t	ifi_epoch;		/* uptime at attach or stat reset */
-#ifdef __alpha__
-	u_int	ifi_timepad;		/* time_t is int, not long on alpha */
-#endif
 	struct	timeval ifi_lastchange;	/* time of last administrative change */
 };
 
@@ -183,7 +180,24 @@
 #define	IF_Mbps(x)	(IF_Kbps((x) * 1000))	/* megabits/sec. */
 #define	IF_Gbps(x)	(IF_Mbps((x) * 1000))	/* gigabits/sec. */
 
-/* Capabilities that interfaces can advertise. */
+/*
+ * Capabilities that interfaces can advertise.
+ *
+ * struct ifnet.if_capabilities
+ *   contains the optional features & capabilities a particular interface
+ *   supports (not only the driver but also the detected hw revision).
+ *   Capabilities are defined by IFCAP_* below.
+ * struct ifnet.if_capenable
+ *   contains the enabled (either by default or through ifconfig) optional
+ *   features & capabilities on this interface.
+ *   Capabilities are defined by IFCAP_* below.
+ * struct if_data.ifi_hwassist in mbuf CSUM_ flag form, controlled by above
+ *   contains the enabled optional feature & capabilites that can be used
+ *   individually per packet and are specified in the mbuf pkthdr.csum_flags
+ *   field.  IFCAP_* and CSUM_* do not match one to one and CSUM_* may be
+ *   more detailed or differenciated than IFCAP_*.
+ *   Hwassist features are defined CSUM_* in sys/mbuf.h
+ */
 #define IFCAP_RXCSUM		0x0001  /* can offload checksum on RX */
 #define IFCAP_TXCSUM		0x0002  /* can offload checksum on TX */
 #define IFCAP_NETCONS		0x0004  /* can be a network console */
@@ -191,8 +205,13 @@
 #define	IFCAP_VLAN_HWTAGGING	0x0010	/* hardware VLAN tag support */
 #define	IFCAP_JUMBO_MTU		0x0020	/* 9000 byte MTU supported */
 #define	IFCAP_POLLING		0x0040	/* driver supports polling */
+#define	IFCAP_VLAN_HWCSUM	0x0080	/* can do IFCAP_HWCSUM on VLANs */
+#define	IFCAP_TSO4		0x0100	/* can do TCP Segmentation Offload */
+#define	IFCAP_TSO6		0x0200	/* can do TCP6 Segmentation Offload */
+#define	IFCAP_LRO		0x0400	/* can do Large Receive Offload */
 
 #define IFCAP_HWCSUM		(IFCAP_RXCSUM | IFCAP_TXCSUM)
+#define	IFCAP_TSO		(IFCAP_TSO4 | IFCAP_TSO6)
 
 #define	IFQ_MAXLEN	50
 #define	IFNET_SLOWHZ	1		/* granularity is 1 second */
@@ -358,6 +377,37 @@
 #endif
 
 /*
+ * interface groups
+ */
+
+#define	IFG_ALL		"all"		/* group contains all interfaces */
+/* XXX: will we implement this? */
+#define	IFG_EGRESS	"egress"	/* if(s) default route(s) point to */
+
+struct ifg_req {
+	union {
+		char			 ifgrqu_group[IFNAMSIZ];
+		char			 ifgrqu_member[IFNAMSIZ];
+	} ifgrq_ifgrqu;
+#define	ifgrq_group	ifgrq_ifgrqu.ifgrqu_group
+#define	ifgrq_member	ifgrq_ifgrqu.ifgrqu_member
+};
+
+/*
+ * Used to lookup groups for an interface
+ */
+struct ifgroupreq {
+	char	ifgr_name[IFNAMSIZ];
+	u_int	ifgr_len;
+	union {
+		char	ifgru_group[IFNAMSIZ];
+		struct	ifg_req *ifgru_groups;
+	} ifgr_ifgru;
+#define ifgr_group	ifgr_ifgru.ifgru_group
+#define ifgr_groups	ifgr_ifgru.ifgru_groups
+};
+
+/*
  * Structure for SIOC[AGD]LIFADDR
  */
 struct if_laddrreq {
Index: if.c
===================================================================
RCS file: /home/cvs/src/sys/net/if.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/net/if.c -L sys/net/if.c -u -r1.1.1.2 -r1.2
--- sys/net/if.c
+++ sys/net/if.c
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if.c	8.5 (Berkeley) 1/9/95
- * $FreeBSD: src/sys/net/if.c,v 1.234.2.13 2006/02/15 03:37:15 ps Exp $
+ * $FreeBSD: src/sys/net/if.c,v 1.273 2007/07/27 11:59:57 rwatson Exp $
  */
 
 #include "opt_compat.h"
@@ -39,12 +39,12 @@
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/conf.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/sbuf.h>
 #include <sys/bus.h>
 #include <sys/mbuf.h>
 #include <sys/systm.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -59,7 +59,6 @@
 #include <machine/stdarg.h>
 
 #include <net/if.h>
-#include <net/if_arp.h>
 #include <net/if_clone.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
@@ -83,6 +82,8 @@
 #include <netinet/ip_carp.h>
 #endif
 
+#include <security/mac/mac_framework.h>
+
 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
 
@@ -95,12 +96,20 @@
 
 void	(*bstp_linkstate_p)(struct ifnet *ifp, int state);
 void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
+void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
 
 struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
 
+/*
+ * XXX: Style; these should be sorted alphabetically, and unprototyped
+ * static functions should be prototyped. Currently they are sorted by
+ * declaration order.
+ */
 static void	if_attachdomain(void *);
 static void	if_attachdomain1(struct ifnet *);
+static void	if_purgemaddrs(struct ifnet *);
 static int	ifconf(u_long, caddr_t);
+static void	if_freemulti(struct ifmultiaddr *);
 static void	if_grow(void);
 static void	if_init(void *);
 static void	if_check(void *);
@@ -112,8 +121,11 @@
 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
 static int	if_rtdel(struct radix_node *, void *);
 static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
+static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
 static void	if_start_deferred(void *context, int pending);
 static void	do_link_state_change(void *, int);
+static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
+static int	if_getgroupmembers(struct ifgroupreq *);
 #ifdef INET6
 /*
  * XXX: declare here to avoid to include many inet6 related files..
@@ -126,6 +138,7 @@
 struct	ifindex_entry *ifindex_table = NULL;
 int	ifqmaxlen = IFQ_MAXLEN;
 struct	ifnethead ifnet;	/* depend on static init XXX */
+struct	ifgrouphead ifg_head;
 struct	mtx ifnet_lock;
 static	if_com_alloc_t *if_com_alloc[256];
 static	if_com_free_t *if_com_free[256];
@@ -276,6 +289,7 @@
  * Routines with ifa_ifwith* names take sockaddr *'s as
  * parameters.
  */
+
 /* ARGSUSED*/
 static void
 if_init(void *dummy __unused)
@@ -283,6 +297,7 @@
 
 	IFNET_LOCK_INIT();
 	TAILQ_INIT(&ifnet);
+	TAILQ_INIT(&ifg_head);
 	knlist_init(&ifklist, NULL, NULL, NULL, NULL);
 	if_grow();				/* create initial table */
 	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
@@ -333,7 +348,9 @@
 }
 
 /*
- * Allocate a struct ifnet and in index for an interface.
+ * Allocate a struct ifnet and an index for an interface.  A layer 2
+ * common structure will also be allocated if an allocation routine is
+ * registered for the passed type.
  */
 struct ifnet*
 if_alloc(u_char type)
@@ -377,14 +394,24 @@
 	return (ifp);
 }
 
+/*
+ * Free the struct ifnet, the associated index, and the layer 2 common
+ * structure if needed.  All the work is done in if_free_type().
+ *
+ * Do not add code to this function!  Add it to if_free_type().
+ */
 void
 if_free(struct ifnet *ifp)
 {
 
-	/* Do not add code to this function!  Add it to if_free_type(). */
 	if_free_type(ifp, ifp->if_type);
 }
 
+/*
+ * Do the actual work of freeing a struct ifnet, associated index, and
+ * layer 2 common structure.  This version should only be called by
+ * intefaces that switch their type after calling if_alloc().
+ */
 void
 if_free_type(struct ifnet *ifp, u_char type)
 {
@@ -410,8 +437,16 @@
 };
 
 /*
- * Attach an interface to the
- * list of "active" interfaces.
+ * Perform generic interface initalization tasks and attach the interface
+ * to the list of "active" interfaces.
+ *
+ * XXX:
+ *  - The decision to return void and thus require this function to
+ *    succeed is questionable.
+ *  - We do more initialization here then is probably a good idea.
+ *    Some of this should probably move to if_alloc().
+ *  - We should probably do more sanity checking.  For instance we don't
+ *    do anything to insure if_xname is unique or non-empty.
  */
 void
 if_attach(struct ifnet *ifp)
@@ -429,19 +464,14 @@
 	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
 	IF_AFDATA_LOCK_INIT(ifp);
 	ifp->if_afdata_initialized = 0;
-	IFNET_WLOCK();
-	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
-	IFNET_WUNLOCK();
-	/*
-	 * XXX -
-	 * The old code would work if the interface passed a pre-existing
-	 * chain of ifaddrs to this code.  We don't trust our callers to
-	 * properly initialize the tailq, however, so we no longer allow
-	 * this unlikely case.
-	 */
+
 	TAILQ_INIT(&ifp->if_addrhead);
 	TAILQ_INIT(&ifp->if_prefixhead);
 	TAILQ_INIT(&ifp->if_multiaddrs);
+	TAILQ_INIT(&ifp->if_groups);
+
+	if_addgroup(ifp, IFG_ALL);
+
 	knlist_init(&ifp->if_klist, NULL, NULL, NULL, NULL);
 	getmicrotime(&ifp->if_lastchange);
 	ifp->if_data.ifi_epoch = time_uptime;
@@ -484,7 +514,7 @@
 	sdl->sdl_nlen = namelen;
 	sdl->sdl_index = ifp->if_index;
 	sdl->sdl_type = ifp->if_type;
-	ifaddr_byindex(ifp->if_index) = ifa;
+	ifp->if_addr = ifa;
 	ifa->ifa_ifp = ifp;
 	ifa->ifa_rtrequest = link_rtrequest;
 	ifa->ifa_addr = (struct sockaddr *)sdl;
@@ -502,13 +532,21 @@
 	ifp->if_snd.altq_tbr  = NULL;
 	ifp->if_snd.altq_ifp  = ifp;
 
+	IFNET_WLOCK();
+	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
+	IFNET_WUNLOCK();
+
 	if (domain_init_status >= 2)
 		if_attachdomain1(ifp);
 
 	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
+	devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
 
 	/* Announce the interface. */
 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
+
+	if (ifp->if_watchdog != NULL)
+		if_printf(ifp, "using obsoleted if_watchdog interface\n");
 }
 
 static void
@@ -563,21 +601,19 @@
 }
 
 /*
- * Remove any network addresses from an interface.
+ * Remove any unicast or broadcast network addresses from an interface.
  */
-
 void
 if_purgeaddrs(struct ifnet *ifp)
 {
 	struct ifaddr *ifa, *next;
 
 	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
-
 		if (ifa->ifa_addr->sa_family == AF_LINK)
 			continue;
 #ifdef INET
 		/* XXX: Ugly!! ad hoc just for INET */
-		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
+		if (ifa->ifa_addr->sa_family == AF_INET) {
 			struct ifaliasreq ifr;
 
 			bzero(&ifr, sizeof(ifr));
@@ -590,7 +626,7 @@
 		}
 #endif /* INET */
 #ifdef INET6
-		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
+		if (ifa->ifa_addr->sa_family == AF_INET6) {
 			in6_purgeaddr(ifa);
 			/* ifp_addrhead is already updated */
 			continue;
@@ -602,8 +638,23 @@
 }
 
 /*
+ * Remove any multicast network addresses from an interface.
+ */
+static void
+if_purgemaddrs(struct ifnet *ifp)
+{
+	struct ifmultiaddr *ifma;
+	struct ifmultiaddr *next;
+
+	IF_ADDR_LOCK(ifp);
+	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
+		if_delmulti_locked(ifp, ifma, 1);
+	IF_ADDR_UNLOCK(ifp);
+}
+
+/*
  * Detach an interface, removing it from the
- * list of "active" interfaces and freeing the struct ifnet.
+ * list of "active" interfaces.
  *
  * XXXRW: There are some significant questions about event ordering, and
  * how to prevent things from starting to use the interface during detach.
@@ -617,19 +668,24 @@
 	int i;
 	struct domain *dp;
  	struct ifnet *iter;
- 	int found;
+ 	int found = 0;
+
+	IFNET_WLOCK();
+	TAILQ_FOREACH(iter, &ifnet, if_link)
+		if (iter == ifp) {
+			TAILQ_REMOVE(&ifnet, ifp, if_link);
+			found = 1;
+			break;
+		}
+	IFNET_WUNLOCK();
+	if (!found)
+		return;
 
 	/*
 	 * Remove/wait for pending events.
 	 */
 	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
 
-#ifdef DEV_CARP
-	/* Maybe hook to the generalized departure handler above?!? */
-	if (ifp->if_carp)
-		carp_ifdetach(ifp);
-#endif
-
 	/*
 	 * Remove routes and flush queues.
 	 */
@@ -657,11 +713,13 @@
 	 */
 	in6_ifdetach(ifp);
 #endif
+	if_purgemaddrs(ifp);
+
 	/*
-	 * Remove address from ifindex_table[] and maybe decrement if_index.
+	 * Remove link ifaddr pointer and maybe decrement if_index.
 	 * Clean up all addresses.
 	 */
-	ifaddr_byindex(ifp->if_index) = NULL;
+	ifp->if_addr = NULL;
 	destroy_dev(ifdev_byindex(ifp->if_index));
 	ifdev_byindex(ifp->if_index) = NULL;
 
@@ -689,6 +747,7 @@
 	/* Announce that the interface is gone. */
 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
 	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
+	devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
 
 	IF_AFDATA_LOCK(ifp);
 	for (dp = domains; dp; dp = dp->dom_next) {
@@ -704,22 +763,220 @@
 	KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT);
 	knlist_clear(&ifp->if_klist, 0);
 	knlist_destroy(&ifp->if_klist);
-	IFNET_WLOCK();
- 	found = 0;
- 	TAILQ_FOREACH(iter, &ifnet, if_link)
- 		if (iter == ifp) {
- 			found = 1;
- 			break;
- 		}
- 	if (found)
- 		TAILQ_REMOVE(&ifnet, ifp, if_link);
-	IFNET_WUNLOCK();
 	mtx_destroy(&ifp->if_snd.ifq_mtx);
 	IF_AFDATA_DESTROY(ifp);
 	splx(s);
 }
 
 /*
+ * Add a group to an interface
+ */
+int
+if_addgroup(struct ifnet *ifp, const char *groupname)
+{
+	struct ifg_list		*ifgl;
+	struct ifg_group	*ifg = NULL;
+	struct ifg_member	*ifgm;
+
+	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
+	    groupname[strlen(groupname) - 1] <= '9')
+		return (EINVAL);
+
+	IFNET_WLOCK();
+	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
+		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
+			IFNET_WUNLOCK();
+			return (EEXIST);
+		}
+
+	if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
+	    M_NOWAIT)) == NULL) {
+	    	IFNET_WUNLOCK();
+		return (ENOMEM);
+	}
+
+	if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
+	    M_TEMP, M_NOWAIT)) == NULL) {
+		free(ifgl, M_TEMP);
+		IFNET_WUNLOCK();
+		return (ENOMEM);
+	}
+
+	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
+		if (!strcmp(ifg->ifg_group, groupname))
+			break;
+
+	if (ifg == NULL) {
+		if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
+		    M_TEMP, M_NOWAIT)) == NULL) {
+			free(ifgl, M_TEMP);
+			free(ifgm, M_TEMP);
+			IFNET_WUNLOCK();
+			return (ENOMEM);
+		}
+		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
+		ifg->ifg_refcnt = 0;
+		TAILQ_INIT(&ifg->ifg_members);
+		EVENTHANDLER_INVOKE(group_attach_event, ifg);
+		TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
+	}
+
+	ifg->ifg_refcnt++;
+	ifgl->ifgl_group = ifg;
+	ifgm->ifgm_ifp = ifp;
+
+	IF_ADDR_LOCK(ifp);
+	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
+	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
+	IF_ADDR_UNLOCK(ifp);
+
+	IFNET_WUNLOCK();
+
+	EVENTHANDLER_INVOKE(group_change_event, groupname);
+
+	return (0);
+}
+
+/*
+ * Remove a group from an interface
+ */
+int
+if_delgroup(struct ifnet *ifp, const char *groupname)
+{
+	struct ifg_list		*ifgl;
+	struct ifg_member	*ifgm;
+
+	IFNET_WLOCK();
+	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
+		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
+			break;
+	if (ifgl == NULL) {
+		IFNET_WUNLOCK();
+		return (ENOENT);
+	}
+
+	IF_ADDR_LOCK(ifp);
+	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
+	IF_ADDR_UNLOCK(ifp);
+
+	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
+		if (ifgm->ifgm_ifp == ifp)
+			break;
+
+	if (ifgm != NULL) {
+		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
+		free(ifgm, M_TEMP);
+	}
+
+	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
+		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
+		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
+		free(ifgl->ifgl_group, M_TEMP);
+	}
+	IFNET_WUNLOCK();
+
+	free(ifgl, M_TEMP);
+
+	EVENTHANDLER_INVOKE(group_change_event, groupname);
+
+	return (0);
+}
+
+/*
+ * Stores all groups from an interface in memory pointed
+ * to by data
+ */
+static int
+if_getgroup(struct ifgroupreq *data, struct ifnet *ifp)
+{
+	int			 len, error;
+	struct ifg_list		*ifgl;
+	struct ifg_req		 ifgrq, *ifgp;
+	struct ifgroupreq	*ifgr = data;
+
+	if (ifgr->ifgr_len == 0) {
+		IF_ADDR_LOCK(ifp);
+		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
+			ifgr->ifgr_len += sizeof(struct ifg_req);
+		IF_ADDR_UNLOCK(ifp);
+		return (0);
+	}
+
+	len = ifgr->ifgr_len;
+	ifgp = ifgr->ifgr_groups;
+	/* XXX: wire */
+	IF_ADDR_LOCK(ifp);
+	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
+		if (len < sizeof(ifgrq)) {
+			IF_ADDR_UNLOCK(ifp);
+			return (EINVAL);
+		}
+		bzero(&ifgrq, sizeof ifgrq);
+		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
+		    sizeof(ifgrq.ifgrq_group));
+		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
+		    	IF_ADDR_UNLOCK(ifp);
+			return (error);
+		}
+		len -= sizeof(ifgrq);
+		ifgp++;
+	}
+	IF_ADDR_UNLOCK(ifp);
+
+	return (0);
+}
+
+/*
+ * Stores all members of a group in memory pointed to by data
+ */
+static int
+if_getgroupmembers(struct ifgroupreq *data)
+{
+	struct ifgroupreq	*ifgr = data;
+	struct ifg_group	*ifg;
+	struct ifg_member	*ifgm;
+	struct ifg_req		 ifgrq, *ifgp;
+	int			 len, error;
+
+	IFNET_RLOCK();
+	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
+		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
+			break;
+	if (ifg == NULL) {
+		IFNET_RUNLOCK();
+		return (ENOENT);
+	}
+
+	if (ifgr->ifgr_len == 0) {
+		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
+			ifgr->ifgr_len += sizeof(ifgrq);
+		IFNET_RUNLOCK();
+		return (0);
+	}
+
+	len = ifgr->ifgr_len;
+	ifgp = ifgr->ifgr_groups;
+	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
+		if (len < sizeof(ifgrq)) {
+			IFNET_RUNLOCK();
+			return (EINVAL);
+		}
+		bzero(&ifgrq, sizeof ifgrq);
+		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
+		    sizeof(ifgrq.ifgrq_member));
+		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
+			IFNET_RUNLOCK();
+			return (error);
+		}
+		len -= sizeof(ifgrq);
+		ifgp++;
+	}
+	IFNET_RUNLOCK();
+
+	return (0);
+}
+
+/*
  * Delete Routes for a Network Interface
  *
  * Called for each routing entry via the rnh->rnh_walktree() call above
@@ -761,7 +1018,21 @@
 	return (0);
 }
 
-#define	sa_equal(a1, a2)	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
+/*
+ * XXX: Because sockaddr_dl has deeper structure than the sockaddr
+ * structs used to represent other address families, it is necessary
+ * to perform a different comparison.
+ */
+
+#define	sa_equal(a1, a2)	\
+	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
+
+#define	sa_dl_equal(a1, a2)	\
+	((((struct sockaddr_dl *)(a1))->sdl_len ==			\
+	 ((struct sockaddr_dl *)(a2))->sdl_len) &&			\
+	 (bcmp(LLADDR((struct sockaddr_dl *)(a1)),			\
+	       LLADDR((struct sockaddr_dl *)(a2)),			\
+	       ((struct sockaddr_dl *)(a1))->sdl_alen) == 0))
 
 /*
  * Locate an interface based on a complete address.
@@ -794,6 +1065,33 @@
 }
 
 /*
+ * Locate an interface based on the broadcast address.
+ */
+/* ARGSUSED */
+struct ifaddr *
+ifa_ifwithbroadaddr(struct sockaddr *addr)
+{
+	struct ifnet *ifp;
+	struct ifaddr *ifa;
+
+	IFNET_RLOCK();
+	TAILQ_FOREACH(ifp, &ifnet, if_link)
+		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+			if (ifa->ifa_addr->sa_family != addr->sa_family)
+				continue;
+			if ((ifp->if_flags & IFF_BROADCAST) &&
+			    ifa->ifa_broadaddr &&
+			    ifa->ifa_broadaddr->sa_len != 0 &&
+			    sa_equal(ifa->ifa_broadaddr, addr))
+				goto done;
+		}
+	ifa = NULL;
+done:
+	IFNET_RUNLOCK();
+	return (ifa);
+}
+
+/*
  * Locate the point to point interface with a given destination address.
  */
 /*ARGSUSED*/
@@ -1045,6 +1343,7 @@
 }
 
 void	(*vlan_link_state_p)(struct ifnet *, int);	/* XXX: private from if_vlan */
+void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
 
 /*
  * Handle a change in the interface link state. To avoid LORs
@@ -1080,7 +1379,7 @@
 	else
 		link = NOTE_LINKINV;
 	KNOTE_UNLOCKED(&ifp->if_klist, link);
-	if (ifp->if_nvlans != 0)
+	if (ifp->if_vlantrunk != NULL)
 		(*vlan_link_state_p)(ifp, link);
 
 	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
@@ -1094,6 +1393,10 @@
 		KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!"));
 		(*bstp_linkstate_p)(ifp, link_state);
 	}
+	if (ifp->if_lagg) {
+		KASSERT(lagg_linkstate_p != NULL,("if_lagg not loaded!"));
+		(*lagg_linkstate_p)(ifp, link_state);
+	}
 
 	devctl_notify("IFNET", ifp->if_xname,
 	    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
@@ -1248,7 +1551,7 @@
 		break;
 
 	case SIOCSIFFLAGS:
-		error = suser(td);
+		error = priv_check(td, PRIV_NET_SETIFFLAGS);
 		if (error)
 			return (error);
 		/*
@@ -1270,15 +1573,19 @@
 			if_up(ifp);
 			splx(s);
 		}
+		/* See if permanently promiscuous mode bit is about to flip */
+		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
+			if (new_flags & IFF_PPROMISC)
+				ifp->if_flags |= IFF_PROMISC;
+			else if (ifp->if_pcount == 0)
+				ifp->if_flags &= ~IFF_PROMISC;
+			log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
+			    ifp->if_xname,
+			    (new_flags & IFF_PPROMISC) ? "enabled" : "disabled");
+		}
 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
 			(new_flags &~ IFF_CANTCHANGE);
-		if (new_flags & IFF_PPROMISC) {
-			/* Permanently promiscuous mode requested */
-			ifp->if_flags |= IFF_PROMISC;
-		} else if (ifp->if_pcount == 0) {
-			ifp->if_flags &= ~IFF_PROMISC;
-		}
-		if (ifp->if_ioctl != NULL) {
+		if (ifp->if_ioctl) {
 			IFF_LOCKGIANT(ifp);
 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
 			IFF_UNLOCKGIANT(ifp);
@@ -1287,7 +1594,7 @@
 		break;
 
 	case SIOCSIFCAP:
-		error = suser(td);
+		error = priv_check(td, PRIV_NET_SETIFCAP);
 		if (error)
 			return (error);
 		if (ifp->if_ioctl == NULL)
@@ -1308,8 +1615,8 @@
 #endif
 
 	case SIOCSIFNAME:
-		error = suser(td);
-		if (error != 0)
+		error = priv_check(td, PRIV_NET_SETIFNAME);
+		if (error)
 			return (error);
 		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
 		if (error != 0)
@@ -1327,7 +1634,7 @@
 		    ifp->if_xname, new_name);
 
 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
-		ifa = ifaddr_byindex(ifp->if_index);
+		ifa = ifp->if_addr;
 		IFA_LOCK(ifa);
 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 		namelen = strlen(new_name);
@@ -1355,7 +1662,7 @@
 		break;
 
 	case SIOCSIFMETRIC:
-		error = suser(td);
+		error = priv_check(td, PRIV_NET_SETIFMETRIC);
 		if (error)
 			return (error);
 		ifp->if_metric = ifr->ifr_metric;
@@ -1363,7 +1670,7 @@
 		break;
 
 	case SIOCSIFPHYS:
-		error = suser(td);
+		error = priv_check(td, PRIV_NET_SETIFPHYS);
 		if (error)
 			return (error);
 		if (ifp->if_ioctl == NULL)
@@ -1379,7 +1686,7 @@
 	{
 		u_long oldmtu = ifp->if_mtu;
 
-		error = suser(td);
+		error = priv_check(td, PRIV_NET_SETIFMTU);
 		if (error)
 			return (error);
 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
@@ -1406,7 +1713,10 @@
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
-		error = suser(td);
+		if (cmd == SIOCADDMULTI)
+			error = priv_check(td, PRIV_NET_ADDMULTI);
+		else
+			error = priv_check(td, PRIV_NET_DELMULTI);
 		if (error)
 			return (error);
 
@@ -1420,7 +1730,21 @@
 
 		if (cmd == SIOCADDMULTI) {
 			struct ifmultiaddr *ifma;
-			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
+
+			/*
+			 * Userland is only permitted to join groups once
+			 * via the if_addmulti() KPI, because it cannot hold
+			 * struct ifmultiaddr * between calls. It may also
+			 * lose a race while we check if the membership
+			 * already exists.
+			 */
+			IF_ADDR_LOCK(ifp);
+			ifma = if_findmulti(ifp, &ifr->ifr_addr);
+			IF_ADDR_UNLOCK(ifp);
+			if (ifma != NULL)
+				error = EADDRINUSE;
+			else
+				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
 		} else {
 			error = if_delmulti(ifp, &ifr->ifr_addr);
 		}
@@ -1436,7 +1760,7 @@
 	case SIOCSLIFPHYADDR:
 	case SIOCSIFMEDIA:
 	case SIOCSIFGENERIC:
-		error = suser(td);
+		error = priv_check(td, PRIV_NET_HWIOCTL);
 		if (error)
 			return (error);
 		if (ifp->if_ioctl == NULL)
@@ -1465,13 +1789,42 @@
 		break;
 
 	case SIOCSIFLLADDR:
-		error = suser(td);
+		error = priv_check(td, PRIV_NET_SETLLADDR);
 		if (error)
 			return (error);
 		error = if_setlladdr(ifp,
 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
 		break;
 
+	case SIOCAIFGROUP:
+	{
+		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
+
+		error = priv_check(td, PRIV_NET_ADDIFGROUP);
+		if (error)
+			return (error);
+		if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
+			return (error);
+		break;
+	}
+
+	case SIOCGIFGROUP:
+		if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp)))
+			return (error);
+		break;
+
+	case SIOCDIFGROUP:
+	{
+		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
+
+		error = priv_check(td, PRIV_NET_DELIFGROUP);
+		if (error)
+			return (error);
+		if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
+			return (error);
+		break;
+	}
+
 	default:
 		error = ENOIOCTL;
 		break;
@@ -1502,15 +1855,22 @@
 
 	switch (cmd) {
 	case SIOCIFCREATE:
+	case SIOCIFCREATE2:
+		error = priv_check(td, PRIV_NET_IFCREATE);
+		if (error)
+			return (error);
+		return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
+			cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
 	case SIOCIFDESTROY:
-		if ((error = suser(td)) != 0)
+		error = priv_check(td, PRIV_NET_IFDESTROY);
+		if (error)
 			return (error);
-		return ((cmd == SIOCIFCREATE) ?
-			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
-			if_clone_destroy(ifr->ifr_name));
+		return if_clone_destroy(ifr->ifr_name);
 
 	case SIOCIFGCLONERS:
 		return (if_clone_list((struct if_clonereq *)data));
+	case SIOCGIFGMEMB:
+		return (if_getgroupmembers((struct ifgroupreq *)data));
 	}
 
 	ifp = ifunit(ifr->ifr_name);
@@ -1597,7 +1957,7 @@
 /*
  * The code common to handling reference counted flags,
  * e.g., in ifpromisc() and if_allmulti().
- * The "pflag" argument can specify a permanent mode flag,
+ * The "pflag" argument can specify a permanent mode flag to check,
  * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
  *
  * Only to be used on stack-owned flags, not driver-owned flags.
@@ -1609,25 +1969,18 @@
 	int error;
 	int oldflags, oldcount;
 
+	/* Sanity checks to catch programming errors */
 	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
-	    ("if_setflag: setting driver-ownded flag %d", flag));
+	    ("%s: setting driver-owned flag %d", __func__, flag));
 
-	/* Sanity checks to catch programming errors */
-	if (onswitch) {
-		if (*refcount < 0) {
-			if_printf(ifp,
-			    "refusing to increment negative refcount %d "
-			    "for interface flag %d\n", *refcount, flag);
-			return (EINVAL);
-		}
-	} else {
-		if (*refcount <= 0) {
-			if_printf(ifp,
-			    "refusing to decrement non-positive refcount %d"
-			    "for interface flag %d\n", *refcount, flag);
-			return (EINVAL);
-		}
-	}
+	if (onswitch)
+		KASSERT(*refcount >= 0,
+		    ("%s: increment negative refcount %d for flag %d",
+		    __func__, *refcount, flag));
+	else
+		KASSERT(*refcount > 0,
+		    ("%s: decrement non-positive refcount %d for flag %d",
+		    __func__, *refcount, flag));
 
 	/* In case this mode is permanent, just touch refcount */
 	if (ifp->if_flags & pflag) {
@@ -1834,7 +2187,7 @@
 	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
 }
 
-static struct ifmultiaddr *
+struct ifmultiaddr *
 if_findmulti(struct ifnet *ifp, struct sockaddr *sa)
 {
 	struct ifmultiaddr *ifma;
@@ -1842,8 +2195,13 @@
 	IF_ADDR_LOCK_ASSERT(ifp);
 
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
-		if (sa_equal(ifma->ifma_addr, sa))
-			break;
+		if (sa->sa_family == AF_LINK) {
+			if (sa_dl_equal(ifma->ifma_addr, sa))
+				break;
+		} else {
+			if (sa_equal(ifma->ifma_addr, sa))
+				break;
+		}
 	}
 
 	return ifma;
@@ -1907,7 +2265,7 @@
 if_freemulti(struct ifmultiaddr *ifma)
 {
 
-	KASSERT(ifma->ifma_refcount == 1, ("if_freemulti: refcount %d",
+	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
 	    ifma->ifma_refcount));
 	KASSERT(ifma->ifma_protospec == NULL,
 	    ("if_freemulti: protospec not NULL"));
@@ -1993,6 +2351,7 @@
 		if (ll_ifma == NULL) {
 			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
 			if (ll_ifma == NULL) {
+				--ifma->ifma_refcount;
 				if_freemulti(ifma);
 				error = ENOMEM;
 				goto free_llsa_out;
@@ -2001,6 +2360,7 @@
 			    ifma_link);
 		} else
 			ll_ifma->ifma_refcount++;
+		ifma->ifma_llifma = ll_ifma;
 	}
 
 	/*
@@ -2016,8 +2376,6 @@
 	/*
 	 * Must generate the message while holding the lock so that 'ifma'
 	 * pointer is still valid.
-	 *
-	 * XXXRW: How come we don't announce ll_ifma?
 	 */
 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
 	IF_ADDR_UNLOCK(ifp);
@@ -2047,61 +2405,182 @@
 }
 
 /*
- * Remove a reference to a multicast address on this interface.  Yell
- * if the request does not match an existing membership.
+ * Delete a multicast group membership by network-layer group address.
+ *
+ * Returns ENOENT if the entry could not be found. If ifp no longer
+ * exists, results are undefined. This entry point should only be used
+ * from subsystems which do appropriate locking to hold ifp for the
+ * duration of the call.
+ * Network-layer protocol domains must use if_delmulti_ifma().
  */
 int
 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
 {
-	struct ifmultiaddr *ifma, *ll_ifma;
+	struct ifmultiaddr *ifma;
+	int lastref;
+#ifdef INVARIANTS
+	struct ifnet *oifp;
+
+	IFNET_RLOCK();
+	TAILQ_FOREACH(oifp, &ifnet, if_link)
+		if (ifp == oifp)
+			break;
+	if (ifp != oifp)
+		ifp = NULL;
+	IFNET_RUNLOCK();
+
+	KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
+#endif
+	if (ifp == NULL)
+		return (ENOENT);
 
 	IF_ADDR_LOCK(ifp);
+	lastref = 0;
 	ifma = if_findmulti(ifp, sa);
-	if (ifma == NULL) {
-		IF_ADDR_UNLOCK(ifp);
-		return ENOENT;
+	if (ifma != NULL)
+		lastref = if_delmulti_locked(ifp, ifma, 0);
+	IF_ADDR_UNLOCK(ifp);
+
+	if (ifma == NULL)
+		return (ENOENT);
+
+	if (lastref && ifp->if_ioctl != NULL) {
+		IFF_LOCKGIANT(ifp);
+		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
+		IFF_UNLOCKGIANT(ifp);
 	}
 
-	if (ifma->ifma_refcount > 1) {
-		ifma->ifma_refcount--;
+	return (0);
+}
+
+/*
+ * Delete a multicast group membership by group membership pointer.
+ * Network-layer protocol domains must use this routine.
+ *
+ * It is safe to call this routine if the ifp disappeared. Callers should
+ * hold IFF_LOCKGIANT() to avoid a LOR in case the hardware needs to be
+ * reconfigured.
+ */
+void
+if_delmulti_ifma(struct ifmultiaddr *ifma)
+{
+	struct ifnet *ifp;
+	int lastref;
+
+	ifp = ifma->ifma_ifp;
+#ifdef DIAGNOSTIC
+	if (ifp == NULL) {
+		printf("%s: ifma_ifp seems to be detached\n", __func__);
+	} else {
+		struct ifnet *oifp;
+
+		IFNET_RLOCK();
+		TAILQ_FOREACH(oifp, &ifnet, if_link)
+			if (ifp == oifp)
+				break;
+		if (ifp != oifp) {
+			printf("%s: ifnet %p disappeared\n", __func__, ifp);
+			ifp = NULL;
+		}
+		IFNET_RUNLOCK();
+	}
+#endif
+	/*
+	 * If and only if the ifnet instance exists: Acquire the address lock.
+	 */
+	if (ifp != NULL)
+		IF_ADDR_LOCK(ifp);
+
+	lastref = if_delmulti_locked(ifp, ifma, 0);
+
+	if (ifp != NULL) {
+		/*
+		 * If and only if the ifnet instance exists:
+		 *  Release the address lock.
+		 *  If the group was left: update the hardware hash filter.
+		 */
 		IF_ADDR_UNLOCK(ifp);
-		return 0;
+		if (lastref && ifp->if_ioctl != NULL) {
+			IFF_LOCKGIANT(ifp);
+			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
+			IFF_UNLOCKGIANT(ifp);
+		}
 	}
+}
 
-	sa = ifma->ifma_lladdr;
-	if (sa != NULL)
-		ll_ifma = if_findmulti(ifp, sa);
-	else
-		ll_ifma = NULL;
+/*
+ * Perform deletion of network-layer and/or link-layer multicast address.
+ *
+ * Return 0 if the reference count was decremented.
+ * Return 1 if the final reference was released, indicating that the
+ * hardware hash filter should be reprogrammed.
+ */
+static int
+if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
+{
+	struct ifmultiaddr *ll_ifma;
+
+	if (ifp != NULL && ifma->ifma_ifp != NULL) {
+		KASSERT(ifma->ifma_ifp == ifp,
+		    ("%s: inconsistent ifp %p", __func__, ifp));
+		IF_ADDR_LOCK_ASSERT(ifp);
+	}
+
+	ifp = ifma->ifma_ifp;
 
 	/*
-	 * XXXRW: How come we don't announce ll_ifma?
+	 * If the ifnet is detaching, null out references to ifnet,
+	 * so that upper protocol layers will notice, and not attempt
+	 * to obtain locks for an ifnet which no longer exists. The
+	 * routing socket announcement must happen before the ifnet
+	 * instance is detached from the system.
 	 */
-	rt_newmaddrmsg(RTM_DELMADDR, ifma);
+	if (detaching) {
+#ifdef DIAGNOSTIC
+		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
+#endif
+		/*
+		 * ifp may already be nulled out if we are being reentered
+		 * to delete the ll_ifma.
+		 */
+		if (ifp != NULL) {
+			rt_newmaddrmsg(RTM_DELMADDR, ifma);
+			ifma->ifma_ifp = NULL;
+		}
+	}
 
-	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
-	if_freemulti(ifma);
+	if (--ifma->ifma_refcount > 0)
+		return 0;
 
+	/*
+	 * If this ifma is a network-layer ifma, a link-layer ifma may
+	 * have been associated with it. Release it first if so.
+	 */
+	ll_ifma = ifma->ifma_llifma;
 	if (ll_ifma != NULL) {
-		if (ll_ifma->ifma_refcount == 1) {
-			TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifma_link);
+		KASSERT(ifma->ifma_lladdr != NULL,
+		    ("%s: llifma w/o lladdr", __func__));
+		if (detaching)
+			ll_ifma->ifma_ifp = NULL;	/* XXX */
+		if (--ll_ifma->ifma_refcount == 0) {
+			if (ifp != NULL) {
+				TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma,
+				    ifma_link);
+			}
 			if_freemulti(ll_ifma);
-		} else
-			ll_ifma->ifma_refcount--;
+		}
 	}
-	IF_ADDR_UNLOCK(ifp);
+
+	if (ifp != NULL)
+		TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
+
+	if_freemulti(ifma);
 
 	/*
-	 * Make sure the interface driver is notified
-	 * in the case of a link layer mcast group being left.
+	 * The last reference to this instance of struct ifmultiaddr
+	 * was released; the hardware should be notified of this change.
 	 */
-	if (ifp->if_ioctl) {
-		IFF_LOCKGIANT(ifp);
-		(void) (*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
-		IFF_UNLOCKGIANT(ifp);
-	}
-
-	return 0;
+	return 1;
 }
 
 /*
@@ -2117,7 +2596,7 @@
 	struct ifaddr *ifa;
 	struct ifreq ifr;
 
-	ifa = ifaddr_byindex(ifp->if_index);
+	ifa = ifp->if_addr;
 	if (ifa == NULL)
 		return (EINVAL);
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
@@ -2126,20 +2605,14 @@
 	if (len != sdl->sdl_alen)	/* don't allow length to change */
 		return (EINVAL);
 	switch (ifp->if_type) {
-	case IFT_ETHER:			/* these types use struct arpcom */
+	case IFT_ETHER:
 	case IFT_FDDI:
 	case IFT_XETHER:
 	case IFT_ISO88025:
 	case IFT_L2VLAN:
 	case IFT_BRIDGE:
-		bcopy(lladdr, IFP2ENADDR(ifp), len);
-		/*
-		 * XXX We also need to store the lladdr in LLADDR(sdl),
-		 * which is done below. This is a pain because we must
-		 * remember to keep the info in sync.
-		 */
-		/* FALLTHROUGH */
 	case IFT_ARCNET:
+	case IFT_IEEE8023ADLAG:
 		bcopy(lladdr, LLADDR(sdl), len);
 		break;
 	default:
@@ -2169,8 +2642,7 @@
 		 * the address change.
 		 */
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-			if (ifa->ifa_addr != NULL &&
-			    ifa->ifa_addr->sa_family == AF_INET)
+			if (ifa->ifa_addr->sa_family == AF_INET)
 				arp_ifinit(ifp, ifa);
 		}
 #endif
@@ -2222,9 +2694,7 @@
 if_start(struct ifnet *ifp)
 {
 
-	NET_ASSERT_GIANT();
-
-	if ((ifp->if_flags & IFF_NEEDSGIANT) != 0 && debug_mpsafenet != 0) {
+	if (ifp->if_flags & IFF_NEEDSGIANT) {
 		if (mtx_owned(&Giant))
 			(*(ifp)->if_start)(ifp);
 		else
@@ -2239,11 +2709,6 @@
 {
 	struct ifnet *ifp;
 
-	/*
-	 * This code must be entered with Giant, and should never run if
-	 * we're not running with debug.mpsafenet.
-	 */
-	KASSERT(debug_mpsafenet != 0, ("if_start_deferred: debug.mpsafenet"));
 	GIANT_REQUIRED;
 
 	ifp = context;
@@ -2293,9 +2758,9 @@
 if_deregister_com_alloc(u_char type)
 {
 	
-	KASSERT(if_com_alloc[type] == NULL,
+	KASSERT(if_com_alloc[type] != NULL,
 	    ("if_deregister_com_alloc: %d not registered", type));
-	KASSERT(if_com_free[type] == NULL,
+	KASSERT(if_com_free[type] != NULL,
 	    ("if_deregister_com_alloc: %d free not registered", type));
 	if_com_alloc[type] = NULL;
 	if_com_free[type] = NULL;
--- /dev/null
+++ sys/net/ieee8023ad_lacp.h
@@ -0,0 +1,289 @@
+/*	$NetBSD: ieee8023ad_impl.h,v 1.2 2005/12/10 23:21:39 elad Exp $	*/
+
+/*-
+ * Copyright (c)2005 YAMAMOTO Takashi,
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/net/ieee8023ad_lacp.h,v 1.8 2007/07/05 09:18:57 thompsa Exp $
+ */
+
+/*
+ * IEEE802.3ad LACP
+ *
+ * implementation details.
+ */
+
+#define	LACP_TIMER_CURRENT_WHILE	0
+#define	LACP_TIMER_PERIODIC		1
+#define	LACP_TIMER_WAIT_WHILE		2
+#define	LACP_NTIMER			3
+
+#define	LACP_TIMER_ARM(port, timer, val) \
+	(port)->lp_timer[(timer)] = (val)
+#define	LACP_TIMER_DISARM(port, timer) \
+	(port)->lp_timer[(timer)] = 0
+#define	LACP_TIMER_ISARMED(port, timer) \
+	((port)->lp_timer[(timer)] > 0)
+
+/*
+ * IEEE802.3ad LACP
+ *
+ * protocol definitions.
+ */
+
+#define	LACP_STATE_ACTIVITY	(1<<0)
+#define	LACP_STATE_TIMEOUT	(1<<1)
+#define	LACP_STATE_AGGREGATION	(1<<2)
+#define	LACP_STATE_SYNC		(1<<3)
+#define	LACP_STATE_COLLECTING	(1<<4)
+#define	LACP_STATE_DISTRIBUTING	(1<<5)
+#define	LACP_STATE_DEFAULTED	(1<<6)
+#define	LACP_STATE_EXPIRED	(1<<7)
+
+#define	LACP_PORT_NTT		0x00000001
+#define	LACP_PORT_MARK		0x00000002
+
+#define	LACP_STATE_BITS		\
+	"\020"			\
+	"\001ACTIVITY"		\
+	"\002TIMEOUT"		\
+	"\003AGGREGATION"	\
+	"\004SYNC"		\
+	"\005COLLECTING"	\
+	"\006DISTRIBUTING"	\
+	"\007DEFAULTED"		\
+	"\010EXPIRED"
+
+/*
+ * IEEE802.3 slow protocols
+ *
+ * protocol (on-wire) definitions.
+ *
+ * XXX should be elsewhere.
+ */
+
+#define	SLOWPROTOCOLS_SUBTYPE_LACP	1
+#define	SLOWPROTOCOLS_SUBTYPE_MARKER	2
+
+struct slowprothdr {
+	uint8_t		sph_subtype;
+	uint8_t		sph_version;
+} __packed;
+
+/*
+ * TLV on-wire structure.
+ */
+
+struct tlvhdr {
+	uint8_t		tlv_type;
+	uint8_t		tlv_length;
+	/* uint8_t tlv_value[]; */
+} __packed;
+
+/*
+ * ... and our implementation.
+ */
+
+#define	TLV_SET(tlv, type, length) \
+	do { \
+		(tlv)->tlv_type = (type); \
+		(tlv)->tlv_length = sizeof(*tlv) + (length); \
+	} while (/*CONSTCOND*/0)
+
+struct tlv_template {
+	uint8_t			tmpl_type;
+	uint8_t			tmpl_length;
+};
+
+struct lacp_systemid {
+	uint16_t		lsi_prio;
+	uint8_t			lsi_mac[6];
+} __packed;
+
+struct lacp_portid {
+	uint16_t		lpi_prio;
+	uint16_t		lpi_portno;
+} __packed;
+
+struct lacp_peerinfo {
+	struct lacp_systemid	lip_systemid;
+	uint16_t		lip_key;
+	struct lacp_portid	lip_portid;
+	uint8_t			lip_state;
+	uint8_t			lip_resv[3];
+} __packed;
+
+struct lacp_collectorinfo {
+	uint16_t		lci_maxdelay;
+	uint8_t			lci_resv[12];
+} __packed;
+
+struct lacpdu {
+	struct ether_header	ldu_eh;
+	struct slowprothdr	ldu_sph;
+
+	struct tlvhdr		ldu_tlv_actor;
+	struct lacp_peerinfo	ldu_actor;
+	struct tlvhdr		ldu_tlv_partner;
+	struct lacp_peerinfo	ldu_partner;
+	struct tlvhdr		ldu_tlv_collector;
+	struct lacp_collectorinfo ldu_collector;
+	struct tlvhdr		ldu_tlv_term;
+	uint8_t			ldu_resv[50];
+} __packed;
+
+/*
+ * IEEE802.3ad marker protocol
+ *
+ * protocol (on-wire) definitions.
+ */
+struct lacp_markerinfo {
+	uint16_t		mi_rq_port;
+	uint8_t			mi_rq_system[ETHER_ADDR_LEN];
+	uint32_t		mi_rq_xid;
+	uint8_t			mi_pad[2];
+} __packed;
+
+struct markerdu {
+	struct ether_header	mdu_eh;
+	struct slowprothdr	mdu_sph;
+
+	struct tlvhdr		mdu_tlv;
+	struct lacp_markerinfo	mdu_info;
+	struct tlvhdr		mdu_tlv_term;
+	uint8_t			mdu_resv[90];
+} __packed;
+
+#define	MARKER_TYPE_INFO	0x01
+#define	MARKER_TYPE_RESPONSE	0x02
+
+enum lacp_selected {
+	LACP_UNSELECTED,
+	LACP_STANDBY,	/* not used in this implementation */
+	LACP_SELECTED,
+};
+
+enum lacp_mux_state {
+	LACP_MUX_DETACHED,
+	LACP_MUX_WAITING,
+	LACP_MUX_ATTACHED,
+	LACP_MUX_COLLECTING,
+	LACP_MUX_DISTRIBUTING,
+};
+
+struct lacp_port {
+	TAILQ_ENTRY(lacp_port)	lp_dist_q;
+	LIST_ENTRY(lacp_port)	lp_next;
+	struct lacp_softc	*lp_lsc;
+	struct lagg_port	*lp_lagg;
+	struct ifnet		*lp_ifp;
+	struct lacp_peerinfo	lp_partner;
+	struct lacp_peerinfo	lp_actor;
+	struct lacp_markerinfo	lp_marker;
+#define	lp_state	lp_actor.lip_state
+#define	lp_key		lp_actor.lip_key
+#define	lp_systemid	lp_actor.lip_systemid
+	struct timeval		lp_last_lacpdu;
+	int			lp_lacpdu_sent;
+	enum lacp_mux_state	lp_mux_state;
+	enum lacp_selected	lp_selected;
+	int			lp_flags;
+	u_int			lp_media; /* XXX redundant */
+	int			lp_timer[LACP_NTIMER];
+	struct ifmultiaddr	*lp_ifma;
+
+	struct lacp_aggregator	*lp_aggregator;
+};
+
+struct lacp_aggregator {
+	TAILQ_ENTRY(lacp_aggregator)	la_q;
+	int			la_refcnt; /* num of ports which selected us */
+	int			la_nports; /* num of distributing ports  */
+	TAILQ_HEAD(, lacp_port)	la_ports; /* distributing ports */
+	struct lacp_peerinfo	la_partner;
+	struct lacp_peerinfo	la_actor;
+	int			la_pending; /* number of ports in wait_while */
+};
+
+struct lacp_softc {
+	struct lagg_softc	*lsc_softc;
+	struct lacp_aggregator	*lsc_active_aggregator;
+	TAILQ_HEAD(, lacp_aggregator) lsc_aggregators;
+	boolean_t		lsc_suppress_distributing;
+	struct callout		lsc_transit_callout;
+	struct callout		lsc_callout;
+	LIST_HEAD(, lacp_port)	lsc_ports;
+	u_int32_t		lsc_hashkey;
+	struct task		lsc_qtask;
+	struct ifqueue		lsc_queue;	/* pdu input queue */
+};
+
+#define	LACP_TYPE_ACTORINFO	1
+#define	LACP_TYPE_PARTNERINFO	2
+#define	LACP_TYPE_COLLECTORINFO	3
+
+/* timeout values (in sec) */
+#define	LACP_FAST_PERIODIC_TIME		(1)
+#define	LACP_SLOW_PERIODIC_TIME		(30)
+#define	LACP_SHORT_TIMEOUT_TIME		(3 * LACP_FAST_PERIODIC_TIME)
+#define	LACP_LONG_TIMEOUT_TIME		(3 * LACP_SLOW_PERIODIC_TIME)
+#define	LACP_CHURN_DETECTION_TIME	(60)
+#define	LACP_AGGREGATE_WAIT_TIME	(2)
+#define	LACP_TRANSIT_DELAY		3000	/* in msec */
+
+#define	LACP_STATE_EQ(s1, s2, mask)	\
+	((((s1) ^ (s2)) & (mask)) == 0)
+
+#define	LACP_SYS_PRI(peer)	(peer).lip_systemid.lsi_prio
+
+#define	LACP_PORT(_lp)	((struct lacp_port *)(_lp)->lp_psc)
+#define	LACP_SOFTC(_sc)	((struct lacp_softc *)(_sc)->sc_psc)
+
+void		lacp_input(struct lagg_port *, struct mbuf *);
+struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *);
+int		lacp_attach(struct lagg_softc *);
+int		lacp_detach(struct lagg_softc *);
+void		lacp_init(struct lagg_softc *);
+void		lacp_stop(struct lagg_softc *);
+int		lacp_port_create(struct lagg_port *);
+void		lacp_port_destroy(struct lagg_port *);
+void		lacp_linkstate(struct lagg_port *);
+int		lacp_port_isactive(struct lagg_port *);
+void		lacp_req(struct lagg_softc *, caddr_t);
+void		lacp_portreq(struct lagg_port *, caddr_t);
+
+/* following constants don't include terminating NUL */
+#define	LACP_MACSTR_MAX		(2*6 + 5)
+#define	LACP_SYSTEMPRIOSTR_MAX	(4)
+#define	LACP_SYSTEMIDSTR_MAX	(LACP_SYSTEMPRIOSTR_MAX + 1 + LACP_MACSTR_MAX)
+#define	LACP_PORTPRIOSTR_MAX	(4)
+#define	LACP_PORTNOSTR_MAX	(4)
+#define	LACP_PORTIDSTR_MAX	(LACP_PORTPRIOSTR_MAX + 1 + LACP_PORTNOSTR_MAX)
+#define	LACP_KEYSTR_MAX		(4)
+#define	LACP_PARTNERSTR_MAX	\
+	(1 + LACP_SYSTEMIDSTR_MAX + 1 + LACP_KEYSTR_MAX + 1 \
+	+ LACP_PORTIDSTR_MAX + 1)
+#define	LACP_LAGIDSTR_MAX	\
+	(1 + LACP_PARTNERSTR_MAX + 1 + LACP_PARTNERSTR_MAX + 1)
+#define	LACP_STATESTR_MAX	(255) /* XXX */
--- /dev/null
+++ sys/net/if_lagg.c
@@ -0,0 +1,1719 @@
+/*	$OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $	*/
+
+/*
+ * Copyright (c) 2005, 2006 Reyk Floeter <reyk at openbsd.org>
+ * Copyright (c) 2007 Andrew Thompson <thompsa at FreeBSD.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/net/if_lagg.c,v 1.19.2.5 2007/12/21 05:33:48 thompsa Exp $");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+#include <sys/module.h>
+#include <sys/priv.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/hash.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/taskqueue.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_clone.h>
+#include <net/if_arp.h>
+#include <net/if_dl.h>
+#include <net/if_llc.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+#include <net/bpf.h>
+
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#endif
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <net/if_vlan_var.h>
+#include <net/if_lagg.h>
+#include <net/ieee8023ad_lacp.h>
+
+/* Special flags we should propagate to the lagg ports. */
+static struct {
+	int flag;
+	int (*func)(struct ifnet *, int);
+} lagg_pflags[] = {
+	{IFF_PROMISC, ifpromisc},
+	{IFF_ALLMULTI, if_allmulti},
+	{0, NULL}
+};
+
+SLIST_HEAD(__trhead, lagg_softc) lagg_list;	/* list of laggs */
+static struct mtx	lagg_list_mtx;
+eventhandler_tag	lagg_detach_cookie = NULL;
+
+static int	lagg_clone_create(struct if_clone *, int, caddr_t);
+static void	lagg_clone_destroy(struct ifnet *);
+static void	lagg_lladdr(struct lagg_softc *, uint8_t *);
+static void	lagg_capabilities(struct lagg_softc *);
+static void	lagg_port_lladdr(struct lagg_port *, uint8_t *);
+static void	lagg_port_setlladdr(void *, int);
+static int	lagg_port_create(struct lagg_softc *, struct ifnet *);
+static int	lagg_port_destroy(struct lagg_port *, int);
+static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
+static void	lagg_linkstate(struct lagg_softc *);
+static void	lagg_port_state(struct ifnet *, int);
+static int	lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
+static int	lagg_port_output(struct ifnet *, struct mbuf *,
+		    struct sockaddr *, struct rtentry *);
+static void	lagg_port_ifdetach(void *arg __unused, struct ifnet *);
+static int	lagg_port_checkstacking(struct lagg_softc *);
+static void	lagg_port2req(struct lagg_port *, struct lagg_reqport *);
+static void	lagg_init(void *);
+static void	lagg_stop(struct lagg_softc *);
+static int	lagg_ioctl(struct ifnet *, u_long, caddr_t);
+static int	lagg_ether_setmulti(struct lagg_softc *);
+static int	lagg_ether_cmdmulti(struct lagg_port *, int);
+static	int	lagg_setflag(struct lagg_port *, int, int,
+		    int (*func)(struct ifnet *, int));
+static	int	lagg_setflags(struct lagg_port *, int status);
+static void	lagg_start(struct ifnet *);
+static int	lagg_media_change(struct ifnet *);
+static void	lagg_media_status(struct ifnet *, struct ifmediareq *);
+static struct lagg_port *lagg_link_active(struct lagg_softc *,
+	    struct lagg_port *);
+static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
+
+IFC_SIMPLE_DECLARE(lagg, 0);
+
+/* Simple round robin */
+static int	lagg_rr_attach(struct lagg_softc *);
+static int	lagg_rr_detach(struct lagg_softc *);
+static int	lagg_rr_start(struct lagg_softc *, struct mbuf *);
+static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
+		    struct mbuf *);
+
+/* Active failover */
+static int	lagg_fail_attach(struct lagg_softc *);
+static int	lagg_fail_detach(struct lagg_softc *);
+static int	lagg_fail_start(struct lagg_softc *, struct mbuf *);
+static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
+		    struct mbuf *);
+
+/* Loadbalancing */
+static int	lagg_lb_attach(struct lagg_softc *);
+static int	lagg_lb_detach(struct lagg_softc *);
+static int	lagg_lb_port_create(struct lagg_port *);
+static void	lagg_lb_port_destroy(struct lagg_port *);
+static int	lagg_lb_start(struct lagg_softc *, struct mbuf *);
+static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
+		    struct mbuf *);
+static int	lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
+
+/* 802.3ad LACP */
+static int	lagg_lacp_attach(struct lagg_softc *);
+static int	lagg_lacp_detach(struct lagg_softc *);
+static int	lagg_lacp_start(struct lagg_softc *, struct mbuf *);
+static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
+		    struct mbuf *);
+static void	lagg_lacp_lladdr(struct lagg_softc *);
+
+/* lagg protocol table */
+static const struct {
+	int			ti_proto;
+	int			(*ti_attach)(struct lagg_softc *);
+} lagg_protos[] = {
+	{ LAGG_PROTO_ROUNDROBIN,	lagg_rr_attach },
+	{ LAGG_PROTO_FAILOVER,		lagg_fail_attach },
+	{ LAGG_PROTO_LOADBALANCE,	lagg_lb_attach },
+	{ LAGG_PROTO_ETHERCHANNEL,	lagg_lb_attach },
+	{ LAGG_PROTO_LACP,		lagg_lacp_attach },
+	{ LAGG_PROTO_NONE,		NULL }
+};
+
+static int
+lagg_modevent(module_t mod, int type, void *data)
+{
+
+	switch (type) {
+	case MOD_LOAD:
+		mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF);
+		SLIST_INIT(&lagg_list);
+		if_clone_attach(&lagg_cloner);
+		lagg_input_p = lagg_input;
+		lagg_linkstate_p = lagg_port_state;
+		lagg_detach_cookie = EVENTHANDLER_REGISTER(
+		    ifnet_departure_event, lagg_port_ifdetach, NULL,
+		    EVENTHANDLER_PRI_ANY);
+		break;
+	case MOD_UNLOAD:
+		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
+		    lagg_detach_cookie);
+		if_clone_detach(&lagg_cloner);
+		lagg_input_p = NULL;
+		lagg_linkstate_p = NULL;
+		mtx_destroy(&lagg_list_mtx);
+		break;
+	default:
+		return (EOPNOTSUPP);
+	}
+	return (0);
+}
+
+static moduledata_t lagg_mod = {
+	"if_lagg",
+	lagg_modevent,
+	0
+};
+
+DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+
+static int
+lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+	struct lagg_softc *sc;
+	struct ifnet *ifp;
+	int i, error = 0;
+	static const u_char eaddr[6];	/* 00:00:00:00:00:00 */
+
+	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
+	if (ifp == NULL) {
+		free(sc, M_DEVBUF);
+		return (ENOSPC);
+	}
+
+	sc->sc_proto = LAGG_PROTO_NONE;
+	for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
+		if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
+			sc->sc_proto = lagg_protos[i].ti_proto;
+			if ((error = lagg_protos[i].ti_attach(sc)) != 0) {
+				if_free_type(ifp, IFT_ETHER);
+				free(sc, M_DEVBUF);
+				return (error);
+			}
+			break;
+		}
+	}
+	LAGG_LOCK_INIT(sc);
+	SLIST_INIT(&sc->sc_ports);
+	TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
+
+	/* Initialise pseudo media types */
+	ifmedia_init(&sc->sc_media, 0, lagg_media_change,
+	    lagg_media_status);
+	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
+	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
+
+	if_initname(ifp, ifc->ifc_name, unit);
+	ifp->if_type = IFT_ETHER;
+	ifp->if_softc = sc;
+	ifp->if_start = lagg_start;
+	ifp->if_init = lagg_init;
+	ifp->if_ioctl = lagg_ioctl;
+	ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
+
+	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
+	ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
+	IFQ_SET_READY(&ifp->if_snd);
+
+	/*
+	 * Attach as an ordinary ethernet device, childs will be attached
+	 * as special device IFT_IEEE8023ADLAG.
+	 */
+	ether_ifattach(ifp, eaddr);
+
+	/* Insert into the global list of laggs */
+	mtx_lock(&lagg_list_mtx);
+	SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries);
+	mtx_unlock(&lagg_list_mtx);
+
+	return (0);
+}
+
+static void
+lagg_clone_destroy(struct ifnet *ifp)
+{
+	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+	struct lagg_port *lp;
+
+	LAGG_WLOCK(sc);
+
+	lagg_stop(sc);
+	ifp->if_flags &= ~IFF_UP;
+
+	/* Shutdown and remove lagg ports */
+	while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
+		lagg_port_destroy(lp, 1);
+	/* Unhook the aggregation protocol */
+	(*sc->sc_detach)(sc);
+
+	LAGG_WUNLOCK(sc);
+
+	ifmedia_removeall(&sc->sc_media);
+	ether_ifdetach(ifp);
+	if_free_type(ifp, IFT_ETHER);
+
+	mtx_lock(&lagg_list_mtx);
+	SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries);
+	mtx_unlock(&lagg_list_mtx);
+
+	taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
+	LAGG_LOCK_DESTROY(sc);
+	free(sc, M_DEVBUF);
+}
+
+static void
+lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
+{
+	struct ifnet *ifp = sc->sc_ifp;
+
+	if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
+		return;
+
+	bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+	/* Let the protocol know the MAC has changed */
+	if (sc->sc_lladdr != NULL)
+		(*sc->sc_lladdr)(sc);
+}
+
+static void
+lagg_capabilities(struct lagg_softc *sc)
+{
+	struct lagg_port *lp;
+	int cap = ~0, ena = ~0;
+
+	LAGG_WLOCK_ASSERT(sc);
+
+	/* Get capabilities from the lagg ports */
+	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+		cap &= lp->lp_ifp->if_capabilities;
+		ena &= lp->lp_ifp->if_capenable;
+	}
+	cap = (cap == ~0 ? 0 : cap);
+	ena = (ena == ~0 ? 0 : ena);
+
+	if (sc->sc_ifp->if_capabilities != cap ||
+	    sc->sc_ifp->if_capenable != ena) {
+		sc->sc_ifp->if_capabilities = cap;
+		sc->sc_ifp->if_capenable = ena;
+		getmicrotime(&sc->sc_ifp->if_lastchange);
+
+		if (sc->sc_ifflags & IFF_DEBUG)
+			if_printf(sc->sc_ifp,
+			    "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
+	}
+}
+
+static void
+lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr)
+{
+	struct lagg_softc *sc = lp->lp_softc;
+	struct ifnet *ifp = lp->lp_ifp;
+	struct lagg_llq *llq;
+	int pending = 0;
+
+	LAGG_WLOCK_ASSERT(sc);
+
+	if (lp->lp_detaching ||
+	    memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
+		return;
+
+	/* Check to make sure its not already queued to be changed */
+	SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
+		if (llq->llq_ifp == ifp) {
+			pending = 1;
+			break;
+		}
+	}
+
+	if (!pending) {
+		llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT);
+		if (llq == NULL)	/* XXX what to do */
+			return;
+	}
+
+	/* Update the lladdr even if pending, it may have changed */
+	llq->llq_ifp = ifp;
+	bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
+
+	if (!pending)
+		SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
+
+	taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task);
+}
+
+/*
+ * Set the interface MAC address from a taskqueue to avoid a LOR.
+ */
+static void
+lagg_port_setlladdr(void *arg, int pending)
+{
+	struct lagg_softc *sc = (struct lagg_softc *)arg;
+	struct lagg_llq *llq, *head;
+	struct ifnet *ifp;
+	int error;
+
+	/* Grab a local reference of the queue and remove it from the softc */
+	LAGG_WLOCK(sc);
+	head = SLIST_FIRST(&sc->sc_llq_head);
+	SLIST_FIRST(&sc->sc_llq_head) = NULL;
+	LAGG_WUNLOCK(sc);
+
+	/*
+	 * Traverse the queue and set the lladdr on each ifp. It is safe to do
+	 * unlocked as we have the only reference to it.
+	 */
+	for (llq = head; llq != NULL; llq = head) {
+		ifp = llq->llq_ifp;
+
+		/* Set the link layer address */
+		error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN);
+		if (error)
+			printf("%s: setlladdr failed on %s\n", __func__,
+			    ifp->if_xname);
+
+		head = SLIST_NEXT(llq, llq_entries);
+		free(llq, M_DEVBUF);
+	}
+}
+
+static int
+lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
+{
+	struct lagg_softc *sc_ptr;
+	struct lagg_port *lp;
+	int error = 0;
+
+	LAGG_WLOCK_ASSERT(sc);
+
+	/* Limit the maximal number of lagg ports */
+	if (sc->sc_count >= LAGG_MAX_PORTS)
+		return (ENOSPC);
+
+	/* New lagg port has to be in an idle state */
+	if (ifp->if_drv_flags & IFF_DRV_OACTIVE)
+		return (EBUSY);
+
+	/* Check if port has already been associated to a lagg */
+	if (ifp->if_lagg != NULL)
+		return (EBUSY);
+
+	/* XXX Disallow non-ethernet interfaces (this should be any of 802) */
+	if (ifp->if_type != IFT_ETHER)
+		return (EPROTONOSUPPORT);
+
+	/* Allow the first Ethernet member to define the MTU */
+	if (SLIST_EMPTY(&sc->sc_ports))
+		sc->sc_ifp->if_mtu = ifp->if_mtu;
+	else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
+		if_printf(sc->sc_ifp, "invalid MTU for %s\n",
+		    ifp->if_xname);
+		return (EINVAL);
+	}
+
+	if ((lp = malloc(sizeof(struct lagg_port),
+	    M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
+		return (ENOMEM);
+
+	/* Check if port is a stacked lagg */
+	mtx_lock(&lagg_list_mtx);
+	SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) {
+		if (ifp == sc_ptr->sc_ifp) {
+			mtx_unlock(&lagg_list_mtx);
+			free(lp, M_DEVBUF);
+			return (EINVAL);
+			/* XXX disable stacking for the moment, its untested
+			lp->lp_flags |= LAGG_PORT_STACK;
+			if (lagg_port_checkstacking(sc_ptr) >=
+			    LAGG_MAX_STACKING) {
+				mtx_unlock(&lagg_list_mtx);
+				free(lp, M_DEVBUF);
+				return (E2BIG);
+			}
+			*/
+		}
+	}
+	mtx_unlock(&lagg_list_mtx);
+
+	/* Change the interface type */
+	lp->lp_iftype = ifp->if_type;
+	ifp->if_type = IFT_IEEE8023ADLAG;
+	ifp->if_lagg = lp;
+	lp->lp_ioctl = ifp->if_ioctl;
+	ifp->if_ioctl = lagg_port_ioctl;
+	lp->lp_output = ifp->if_output;
+	ifp->if_output = lagg_port_output;
+
+	lp->lp_ifp = ifp;
+	lp->lp_softc = sc;
+
+	/* Save port link layer address */
+	bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
+
+	if (SLIST_EMPTY(&sc->sc_ports)) {
+		sc->sc_primary = lp;
+		lagg_lladdr(sc, IF_LLADDR(ifp));
+	} else {
+		/* Update link layer address for this port */
+		lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp));
+	}
+
+	/* Insert into the list of ports */
+	SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
+	sc->sc_count++;
+
+	/* Update lagg capabilities */
+	lagg_capabilities(sc);
+	lagg_linkstate(sc);
+
+	/* Add multicast addresses and interface flags to this port */
+	lagg_ether_cmdmulti(lp, 1);
+	lagg_setflags(lp, 1);
+
+	if (sc->sc_port_create != NULL)
+		error = (*sc->sc_port_create)(lp);
+	if (error) {
+		/* remove the port again, without calling sc_port_destroy */
+		lagg_port_destroy(lp, 0);
+		return (error);
+	}
+
+	return (error);
+}
+
+static int
+lagg_port_checkstacking(struct lagg_softc *sc)
+{
+	struct lagg_softc *sc_ptr;
+	struct lagg_port *lp;
+	int m = 0;
+
+	LAGG_WLOCK_ASSERT(sc);
+
+	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+		if (lp->lp_flags & LAGG_PORT_STACK) {
+			sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
+			m = MAX(m, lagg_port_checkstacking(sc_ptr));
+		}
+	}
+
+	return (m + 1);
+}
+
+static int
+lagg_port_destroy(struct lagg_port *lp, int runpd)
+{
+	struct lagg_softc *sc = lp->lp_softc;
+	struct lagg_port *lp_ptr;
+	struct lagg_llq *llq;
+	struct ifnet *ifp = lp->lp_ifp;
+
+	LAGG_WLOCK_ASSERT(sc);
+
+	if (runpd && sc->sc_port_destroy != NULL)
+		(*sc->sc_port_destroy)(lp);
+
+	/*
+	 * Remove multicast addresses and interface flags from this port and
+	 * reset the MAC address, skip if the interface is being detached.
+	 */
+	if (!lp->lp_detaching) {
+		lagg_ether_cmdmulti(lp, 0);
+		lagg_setflags(lp, 0);
+		lagg_port_lladdr(lp, lp->lp_lladdr);
+	}
+
+	/* Restore interface */
+	ifp->if_type = lp->lp_iftype;
+	ifp->if_ioctl = lp->lp_ioctl;
+	ifp->if_output = lp->lp_output;
+	ifp->if_lagg = NULL;
+
+	/* Finally, remove the port from the lagg */
+	SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
+	sc->sc_count--;
+
+	/* Update the primary interface */
+	if (lp == sc->sc_primary) {
+		uint8_t lladdr[ETHER_ADDR_LEN];
+
+		if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) {
+			bzero(&lladdr, ETHER_ADDR_LEN);
+		} else {
+			bcopy(lp_ptr->lp_lladdr,
+			    lladdr, ETHER_ADDR_LEN);
+		}
+		lagg_lladdr(sc, lladdr);
+		sc->sc_primary = lp_ptr;
+
+		/* Update link layer address for each port */
+		SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
+			lagg_port_lladdr(lp_ptr, lladdr);
+	}
+
+	/* Remove any pending lladdr changes from the queue */
+	if (lp->lp_detaching) {
+		SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
+			if (llq->llq_ifp == ifp) {
+				SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq,
+				    llq_entries);
+				free(llq, M_DEVBUF);
+				break;	/* Only appears once */
+			}
+		}
+	}
+
+	if (lp->lp_ifflags)
+		if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
+
+	free(lp, M_DEVBUF);
+
+	/* Update lagg capabilities */
+	lagg_capabilities(sc);
+	lagg_linkstate(sc);
+
+	return (0);
+}
+
+static int
+lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	struct lagg_reqport *rp = (struct lagg_reqport *)data;
+	struct lagg_softc *sc;
+	struct lagg_port *lp = NULL;
+	int error = 0;
+
+	/* Should be checked by the caller */
+	if (ifp->if_type != IFT_IEEE8023ADLAG ||
+	    (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
+		goto fallback;
+
+	switch (cmd) {
+	case SIOCGLAGGPORT:
+		if (rp->rp_portname[0] == '\0' ||
+		    ifunit(rp->rp_portname) != ifp) {
+			error = EINVAL;
+			break;
+		}
+
+		LAGG_RLOCK(sc);
+		if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
+			error = ENOENT;
+			LAGG_RUNLOCK(sc);
+			break;
+		}
+
+		lagg_port2req(lp, rp);
+		LAGG_RUNLOCK(sc);
+		break;
+
+	case SIOCSIFCAP:
+		if (lp->lp_ioctl == NULL) {
+			error = EINVAL;
+			break;
+		}
+		error = (*lp->lp_ioctl)(ifp, cmd, data);
+		if (error)
+			break;
+
+		/* Update lagg interface capabilities */
+		LAGG_WLOCK(sc);
+		lagg_capabilities(sc);
+		LAGG_WUNLOCK(sc);
+		break;
+
+	case SIOCSIFMTU:
+		/* Do not allow the MTU to be changed once joined */
+		error = EINVAL;
+		break;
+
+	default:
+		goto fallback;
+	}
+
+	return (error);
+
+fallback:
+	if (lp->lp_ioctl != NULL)
+		return ((*lp->lp_ioctl)(ifp, cmd, data));
+
+	return (EINVAL);
+}
+
+static int
+lagg_port_output(struct ifnet *ifp, struct mbuf *m,
+	struct sockaddr *dst, struct rtentry *rt0)
+{
+	struct lagg_port *lp = ifp->if_lagg;
+	struct ether_header *eh;
+	short type = 0;
+
+	switch (dst->sa_family) {
+		case pseudo_AF_HDRCMPLT:
+		case AF_UNSPEC:
+			eh = (struct ether_header *)dst->sa_data;
+			type = eh->ether_type;
+			break;
+	}
+
+	/*
+	 * Only allow ethernet types required to initiate or maintain the link,
+	 * aggregated frames take a different path.
+	 */
+	switch (ntohs(type)) {
+		case ETHERTYPE_PAE:	/* EAPOL PAE/802.1x */
+			return ((*lp->lp_output)(ifp, m, dst, rt0));
+	}
+
+	/* drop any other frames */
+	m_freem(m);
+	return (EBUSY);
+}
+
+static void
+lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
+{
+	struct lagg_port *lp;
+	struct lagg_softc *sc;
+
+	if ((lp = ifp->if_lagg) == NULL)
+		return;
+
+	sc = lp->lp_softc;
+
+	LAGG_WLOCK(sc);
+	lp->lp_detaching = 1;
+	lagg_port_destroy(lp, 1);
+	LAGG_WUNLOCK(sc);
+}
+
+static void
+lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
+{
+	struct lagg_softc *sc = lp->lp_softc;
+
+	strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
+	strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
+	rp->rp_prio = lp->lp_prio;
+	rp->rp_flags = lp->lp_flags;
+	if (sc->sc_portreq != NULL)
+		(*sc->sc_portreq)(lp, (caddr_t)&rp->rp_psc);
+
+	/* Add protocol specific flags */
+	switch (sc->sc_proto) {
+		case LAGG_PROTO_FAILOVER:
+			if (lp == sc->sc_primary)
+				rp->rp_flags |= LAGG_PORT_MASTER;
+			if (lp == lagg_link_active(sc, sc->sc_primary))
+				rp->rp_flags |= LAGG_PORT_ACTIVE;
+			break;
+
+		case LAGG_PROTO_ROUNDROBIN:
+		case LAGG_PROTO_LOADBALANCE:
+		case LAGG_PROTO_ETHERCHANNEL:
+			if (LAGG_PORTACTIVE(lp))
+				rp->rp_flags |= LAGG_PORT_ACTIVE;
+			break;
+
+		case LAGG_PROTO_LACP:
+			/* LACP has a different definition of active */
+			if (lacp_port_isactive(lp))
+				rp->rp_flags |= LAGG_PORT_ACTIVE;
+			break;
+	}
+
+}
+
+static void
+lagg_init(void *xsc)
+{
+	struct lagg_softc *sc = (struct lagg_softc *)xsc;
+	struct lagg_port *lp;
+	struct ifnet *ifp = sc->sc_ifp;
+
+	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+		return;
+
+	LAGG_WLOCK(sc);
+
+	ifp->if_drv_flags |= IFF_DRV_RUNNING;
+	/* Update the port lladdrs */
+	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+		lagg_port_lladdr(lp, IF_LLADDR(ifp));
+
+	if (sc->sc_init != NULL)
+		(*sc->sc_init)(sc);
+
+	LAGG_WUNLOCK(sc);
+}
+
+static void
+lagg_stop(struct lagg_softc *sc)
+{
+	struct ifnet *ifp = sc->sc_ifp;
+
+	LAGG_WLOCK_ASSERT(sc);
+
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+		return;
+
+	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+
+	if (sc->sc_stop != NULL)
+		(*sc->sc_stop)(sc);
+}
+
+static int
+lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+	struct lagg_reqall *ra = (struct lagg_reqall *)data;
+	struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
+	struct ifreq *ifr = (struct ifreq *)data;
+	struct lagg_port *lp;
+	struct ifnet *tpif;
+	struct thread *td = curthread;
+	char *buf, *outbuf;
+	int count, buflen, len, error = 0;
+
+	bzero(&rpbuf, sizeof(rpbuf));
+
+	switch (cmd) {
+	case SIOCGLAGG:
+		LAGG_RLOCK(sc);
+		count = 0;
+		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+			count++;
+		buflen = count * sizeof(struct lagg_reqport);
+		LAGG_RUNLOCK(sc);
+
+		outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
+
+		LAGG_RLOCK(sc);
+		ra->ra_proto = sc->sc_proto;
+		if (sc->sc_req != NULL)
+			(*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
+
+		count = 0;
+		buf = outbuf;
+		len = min(ra->ra_size, buflen);
+		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+			if (len < sizeof(rpbuf))
+				break;
+
+			lagg_port2req(lp, &rpbuf);
+			memcpy(buf, &rpbuf, sizeof(rpbuf));
+			count++;
+			buf += sizeof(rpbuf);
+			len -= sizeof(rpbuf);
+		}
+		LAGG_RUNLOCK(sc);
+		ra->ra_ports = count;
+		ra->ra_size = count * sizeof(rpbuf);
+		error = copyout(outbuf, ra->ra_port, ra->ra_size);
+		free(outbuf, M_TEMP);
+		break;
+	case SIOCSLAGG:
+		error = priv_check(td, PRIV_NET_LAGG);
+		if (error)
+			break;
+		if (ra->ra_proto >= LAGG_PROTO_MAX) {
+			error = EPROTONOSUPPORT;
+			break;
+		}
+		if (sc->sc_proto != LAGG_PROTO_NONE) {
+			LAGG_WLOCK(sc);
+			error = sc->sc_detach(sc);
+			/* Reset protocol and pointers */
+			sc->sc_proto = LAGG_PROTO_NONE;
+			sc->sc_detach = NULL;
+			sc->sc_start = NULL;
+			sc->sc_input = NULL;
+			sc->sc_port_create = NULL;
+			sc->sc_port_destroy = NULL;
+			sc->sc_linkstate = NULL;
+			sc->sc_init = NULL;
+			sc->sc_stop = NULL;
+			sc->sc_lladdr = NULL;
+			sc->sc_req = NULL;
+			sc->sc_portreq = NULL;
+			LAGG_WUNLOCK(sc);
+		}
+		if (error != 0)
+			break;
+		for (int i = 0; i < (sizeof(lagg_protos) /
+		    sizeof(lagg_protos[0])); i++) {
+			if (lagg_protos[i].ti_proto == ra->ra_proto) {
+				if (sc->sc_ifflags & IFF_DEBUG)
+					printf("%s: using proto %u\n",
+					    sc->sc_ifname,
+					    lagg_protos[i].ti_proto);
+				LAGG_WLOCK(sc);
+				sc->sc_proto = lagg_protos[i].ti_proto;
+				if (sc->sc_proto != LAGG_PROTO_NONE)
+					error = lagg_protos[i].ti_attach(sc);
+				LAGG_WUNLOCK(sc);
+				return (error);
+			}
+		}
+		error = EPROTONOSUPPORT;
+		break;
+	case SIOCGLAGGPORT:
+		if (rp->rp_portname[0] == '\0' ||
+		    (tpif = ifunit(rp->rp_portname)) == NULL) {
+			error = EINVAL;
+			break;
+		}
+
+		LAGG_RLOCK(sc);
+		if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
+		    lp->lp_softc != sc) {
+			error = ENOENT;
+			LAGG_RUNLOCK(sc);
+			break;
+		}
+
+		lagg_port2req(lp, rp);
+		LAGG_RUNLOCK(sc);
+		break;
+	case SIOCSLAGGPORT:
+		error = priv_check(td, PRIV_NET_LAGG);
+		if (error)
+			break;
+		if (rp->rp_portname[0] == '\0' ||
+		    (tpif = ifunit(rp->rp_portname)) == NULL) {
+			error = EINVAL;
+			break;
+		}
+		LAGG_WLOCK(sc);
+		error = lagg_port_create(sc, tpif);
+		LAGG_WUNLOCK(sc);
+		break;
+	case SIOCSLAGGDELPORT:
+		error = priv_check(td, PRIV_NET_LAGG);
+		if (error)
+			break;
+		if (rp->rp_portname[0] == '\0' ||
+		    (tpif = ifunit(rp->rp_portname)) == NULL) {
+			error = EINVAL;
+			break;
+		}
+
+		LAGG_WLOCK(sc);
+		if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
+		    lp->lp_softc != sc) {
+			error = ENOENT;
+			LAGG_WUNLOCK(sc);
+			break;
+		}
+
+		error = lagg_port_destroy(lp, 1);
+		LAGG_WUNLOCK(sc);
+		break;
+	case SIOCSIFFLAGS:
+		/* Set flags on ports too */
+		LAGG_WLOCK(sc);
+		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+			lagg_setflags(lp, 1);
+		}
+		LAGG_WUNLOCK(sc);
+
+		if (!(ifp->if_flags & IFF_UP) &&
+		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+			/*
+			 * If interface is marked down and it is running,
+			 * then stop and disable it.
+			 */
+			LAGG_WLOCK(sc);
+			lagg_stop(sc);
+			LAGG_WUNLOCK(sc);
+		} else if ((ifp->if_flags & IFF_UP) &&
+		    !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+			/*
+			 * If interface is marked up and it is stopped, then
+			 * start it.
+			 */
+			(*ifp->if_init)(sc);
+		}
+		break;
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		LAGG_WLOCK(sc);
+		error = lagg_ether_setmulti(sc);
+		LAGG_WUNLOCK(sc);
+		break;
+	case SIOCSIFMEDIA:
+	case SIOCGIFMEDIA:
+		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
+		break;
+
+	case SIOCSIFCAP:
+	case SIOCSIFMTU:
+		/* Do not allow the MTU or caps to be directly changed */
+		error = EINVAL;
+		break;
+
+	default:
+		error = ether_ioctl(ifp, cmd, data);
+		break;
+	}
+	return (error);
+}
+
+static int
+lagg_ether_setmulti(struct lagg_softc *sc)
+{
+	struct lagg_port *lp;
+
+	LAGG_WLOCK_ASSERT(sc);
+
+	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+		/* First, remove any existing filter entries. */
+		lagg_ether_cmdmulti(lp, 0);
+		/* copy all addresses from the lagg interface to the port */
+		lagg_ether_cmdmulti(lp, 1);
+	}
+	return (0);
+}
+
+static int
+lagg_ether_cmdmulti(struct lagg_port *lp, int set)
+{
+	struct lagg_softc *sc = lp->lp_softc;
+	struct ifnet *ifp = lp->lp_ifp;
+	struct ifnet *scifp = sc->sc_ifp;
+	struct lagg_mc *mc;
+	struct ifmultiaddr *ifma, *rifma = NULL;
+	struct sockaddr_dl sdl;
+	int error;
+
+	LAGG_WLOCK_ASSERT(sc);
+
+	bzero((char *)&sdl, sizeof(sdl));
+	sdl.sdl_len = sizeof(sdl);
+	sdl.sdl_family = AF_LINK;
+	sdl.sdl_type = IFT_ETHER;
+	sdl.sdl_alen = ETHER_ADDR_LEN;
+	sdl.sdl_index = ifp->if_index;
+
+	if (set) {
+		TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
+			if (ifma->ifma_addr->sa_family != AF_LINK)
+				continue;
+			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
+			    LLADDR(&sdl), ETHER_ADDR_LEN);
+
+			error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
+			if (error)
+				return (error);
+			mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
+			if (mc == NULL)
+				return (ENOMEM);
+			mc->mc_ifma = rifma;
+			SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
+		}
+	} else {
+		while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
+			SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
+			if_delmulti_ifma(mc->mc_ifma);
+			free(mc, M_DEVBUF);
+		}
+	}
+	return (0);
+}
+
+/* Handle a ref counted flag that should be set on the lagg port as well */
+static int
+lagg_setflag(struct lagg_port *lp, int flag, int status,
+	     int (*func)(struct ifnet *, int))
+{
+	struct lagg_softc *sc = lp->lp_softc;
+	struct ifnet *scifp = sc->sc_ifp;
+	struct ifnet *ifp = lp->lp_ifp;
+	int error;
+
+	LAGG_WLOCK_ASSERT(sc);
+
+	status = status ? (scifp->if_flags & flag) : 0;
+	/* Now "status" contains the flag value or 0 */
+
+	/*
+	 * See if recorded ports status is different from what
+	 * we want it to be.  If it is, flip it.  We record ports
+	 * status in lp_ifflags so that we won't clear ports flag
+	 * we haven't set.  In fact, we don't clear or set ports
+	 * flags directly, but get or release references to them.
+	 * That's why we can be sure that recorded flags still are
+	 * in accord with actual ports flags.
+	 */
+	if (status != (lp->lp_ifflags & flag)) {
+		error = (*func)(ifp, status);
+		if (error)
+			return (error);
+		lp->lp_ifflags &= ~flag;
+		lp->lp_ifflags |= status;
+	}
+	return (0);
+}
+
+/*
+ * Handle IFF_* flags that require certain changes on the lagg port
+ * if "status" is true, update ports flags respective to the lagg
+ * if "status" is false, forcedly clear the flags set on port.
+ */
+static int
+lagg_setflags(struct lagg_port *lp, int status)
+{
+	int error, i;
+
+	for (i = 0; lagg_pflags[i].flag; i++) {
+		error = lagg_setflag(lp, lagg_pflags[i].flag,
+		    status, lagg_pflags[i].func);
+		if (error)
+			return (error);
+	}
+	return (0);
+}
+
+static void
+lagg_start(struct ifnet *ifp)
+{
+	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+	struct mbuf *m;
+	int error = 0;
+
+	LAGG_RLOCK(sc);
+	for (;; error = 0) {
+		IFQ_DEQUEUE(&ifp->if_snd, m);
+		if (m == NULL)
+			break;
+
+		ETHER_BPF_MTAP(ifp, m);
+
+		if (sc->sc_proto != LAGG_PROTO_NONE)
+			error = (*sc->sc_start)(sc, m);
+		else
+			m_freem(m);
+
+		if (error == 0)
+			ifp->if_opackets++;
+		else
+			ifp->if_oerrors++;
+	}
+	LAGG_RUNLOCK(sc);
+
+	return;
+}
+
+static struct mbuf *
+lagg_input(struct ifnet *ifp, struct mbuf *m)
+{
+	struct lagg_port *lp = ifp->if_lagg;
+	struct lagg_softc *sc = lp->lp_softc;
+	struct ifnet *scifp = sc->sc_ifp;
+
+	if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+	    (lp->lp_flags & LAGG_PORT_DISABLED) ||
+	    sc->sc_proto == LAGG_PROTO_NONE) {
+		m_freem(m);
+		return (NULL);
+	}
+
+	LAGG_RLOCK(sc);
+	ETHER_BPF_MTAP(scifp, m);
+
+	m = (*sc->sc_input)(sc, lp, m);
+
+	if (m != NULL) {
+		scifp->if_ipackets++;
+		scifp->if_ibytes += m->m_pkthdr.len;
+
+		if (scifp->if_flags & IFF_MONITOR) {
+			m_freem(m);
+			m = NULL;
+		}
+	}
+
+	LAGG_RUNLOCK(sc);
+	return (m);
+}
+
+static int
+lagg_media_change(struct ifnet *ifp)
+{
+	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+
+	if (sc->sc_ifflags & IFF_DEBUG)
+		printf("%s\n", __func__);
+
+	/* Ignore */
+	return (0);
+}
+
+static void
+lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
+{
+	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+	struct lagg_port *lp;
+
+	imr->ifm_status = IFM_AVALID;
+	imr->ifm_active = IFM_ETHER | IFM_AUTO;
+
+	LAGG_RLOCK(sc);
+	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+		if (LAGG_PORTACTIVE(lp))
+			imr->ifm_status |= IFM_ACTIVE;
+	}
+	LAGG_RUNLOCK(sc);
+}
+
+static void
+lagg_linkstate(struct lagg_softc *sc)
+{
+	struct lagg_port *lp;
+	int new_link = LINK_STATE_DOWN;
+
+	/* Our link is considered up if at least one of our ports is active */
+	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+		if (lp->lp_link_state == LINK_STATE_UP) {
+			new_link = LINK_STATE_UP;
+			break;
+		}
+	}
+	if_link_state_change(sc->sc_ifp, new_link);
+}
+
+static void
+lagg_port_state(struct ifnet *ifp, int state)
+{
+	struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
+	struct lagg_softc *sc = NULL;
+
+	if (lp != NULL)
+		sc = lp->lp_softc;
+	if (sc == NULL)
+		return;
+
+	LAGG_WLOCK(sc);
+	lagg_linkstate(sc);
+	if (sc->sc_linkstate != NULL)
+		(*sc->sc_linkstate)(lp);
+	LAGG_WUNLOCK(sc);
+}
+
+struct lagg_port *
+lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
+{
+	struct lagg_port *lp_next, *rval = NULL;
+	// int new_link = LINK_STATE_DOWN;
+
+	LAGG_RLOCK_ASSERT(sc);
+	/*
+	 * Search a port which reports an active link state.
+	 */
+
+	if (lp == NULL)
+		goto search;
+	if (LAGG_PORTACTIVE(lp)) {
+		rval = lp;
+		goto found;
+	}
+	if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL &&
+	    LAGG_PORTACTIVE(lp_next)) {
+		rval = lp_next;
+		goto found;
+	}
+
+search:
+	SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
+		if (LAGG_PORTACTIVE(lp_next)) {
+			rval = lp_next;
+			goto found;
+		}
+	}
+
+found:
+	if (rval != NULL) {
+		/*
+		 * The IEEE 802.1D standard assumes that a lagg with
+		 * multiple ports is always full duplex. This is valid
+		 * for load sharing laggs and if at least two links
+		 * are active. Unfortunately, checking the latter would
+		 * be too expensive at this point.
+		 XXX
+		if ((sc->sc_capabilities & IFCAP_LAGG_FULLDUPLEX) &&
+		    (sc->sc_count > 1))
+			new_link = LINK_STATE_FULL_DUPLEX;
+		else
+			new_link = rval->lp_link_state;
+		 */
+	}
+
+	return (rval);
+}
+
+static const void *
+lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
+{
+	if (m->m_pkthdr.len < (off + len)) {
+		return (NULL);
+	} else if (m->m_len < (off + len)) {
+		m_copydata(m, off, len, buf);
+		return (buf);
+	}
+	return (mtod(m, char *) + off);
+}
+
+uint32_t
+lagg_hashmbuf(struct mbuf *m, uint32_t key)
+{
+	uint16_t etype;
+	uint32_t p = 0;
+	int off;
+	struct ether_header *eh;
+	struct ether_vlan_header vlanbuf;
+	const struct ether_vlan_header *vlan;
+#ifdef INET
+	const struct ip *ip;
+	struct ip ipbuf;
+#endif
+#ifdef INET6
+	const struct ip6_hdr *ip6;
+	struct ip6_hdr ip6buf;
+	uint32_t flow;
+#endif
+
+	off = sizeof(*eh);
+	if (m->m_len < off)
+		goto out;
+	eh = mtod(m, struct ether_header *);
+	etype = ntohs(eh->ether_type);
+	p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, key);
+	p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
+
+	/* Special handling for encapsulating VLAN frames */
+	if (m->m_flags & M_VLANTAG) {
+		p = hash32_buf(&m->m_pkthdr.ether_vtag,
+		    sizeof(m->m_pkthdr.ether_vtag), p);
+	} else if (etype == ETHERTYPE_VLAN) {
+		vlan = lagg_gethdr(m, off,  sizeof(*vlan), &vlanbuf);
+		if (vlan == NULL)
+			goto out;
+
+		p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
+		etype = ntohs(vlan->evl_proto);
+		off += sizeof(*vlan) - sizeof(*eh);
+	}
+
+	switch (etype) {
+#ifdef INET
+	case ETHERTYPE_IP:
+		ip = lagg_gethdr(m, off, sizeof(*ip), &ipbuf);
+		if (ip == NULL)
+			goto out;
+
+		p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
+		p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
+		break;
+#endif
+#ifdef INET6
+	case ETHERTYPE_IPV6:
+		ip6 = lagg_gethdr(m, off, sizeof(*ip6), &ip6buf);
+		if (ip6 == NULL)
+			goto out;
+
+		p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
+		p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
+		flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
+		p = hash32_buf(&flow, sizeof(flow), p);	/* IPv6 flow label */
+		break;
+#endif
+	}
+out:
+	return (p);
+}
+
+int
+lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
+{
+	int error = 0;
+
+	IFQ_HANDOFF(ifp, m, error);
+	if (error)
+		ifp->if_oerrors++;
+	return (error);
+}
+
+/*
+ * Simple round robin aggregation
+ */
+
+static int
+lagg_rr_attach(struct lagg_softc *sc)
+{
+	sc->sc_detach = lagg_rr_detach;
+	sc->sc_start = lagg_rr_start;
+	sc->sc_input = lagg_rr_input;
+	sc->sc_port_create = NULL;
+	sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
+	sc->sc_seq = 0;
+
+	return (0);
+}
+
+static int
+lagg_rr_detach(struct lagg_softc *sc)
+{
+	return (0);
+}
+
+static int
+lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
+{
+	struct lagg_port *lp;
+	uint32_t p;
+
+	p = atomic_fetchadd_32(&sc->sc_seq, 1);
+	p %= sc->sc_count;
+	lp = SLIST_FIRST(&sc->sc_ports);
+	while (p--)
+		lp = SLIST_NEXT(lp, lp_entries);
+
+	/*
+	 * Check the port's link state. This will return the next active
+	 * port if the link is down or the port is NULL.
+	 */
+	if ((lp = lagg_link_active(sc, lp)) == NULL) {
+		m_freem(m);
+		return (ENOENT);
+	}
+
+	/* Send mbuf */
+	return (lagg_enqueue(lp->lp_ifp, m));
+}
+
+static struct mbuf *
+lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
+{
+	struct ifnet *ifp = sc->sc_ifp;
+
+	/* Just pass in the packet to our lagg device */
+	m->m_pkthdr.rcvif = ifp;
+
+	return (m);
+}
+
+/*
+ * Active failover
+ */
+
+static int
+lagg_fail_attach(struct lagg_softc *sc)
+{
+	sc->sc_detach = lagg_fail_detach;
+	sc->sc_start = lagg_fail_start;
+	sc->sc_input = lagg_fail_input;
+	sc->sc_port_create = NULL;
+	sc->sc_port_destroy = NULL;
+
+	return (0);
+}
+
+static int
+lagg_fail_detach(struct lagg_softc *sc)
+{
+	return (0);
+}
+
+static int
+lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
+{
+	struct lagg_port *lp;
+
+	/* Use the master port if active or the next available port */
+	if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
+		m_freem(m);
+		return (ENOENT);
+	}
+
+	/* Send mbuf */
+	return (lagg_enqueue(lp->lp_ifp, m));
+}
+
+static struct mbuf *
+lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
+{
+	struct ifnet *ifp = sc->sc_ifp;
+	struct lagg_port *tmp_tp;
+
+	if (lp == sc->sc_primary) {
+		m->m_pkthdr.rcvif = ifp;
+		return (m);
+	}
+
+	if (!LAGG_PORTACTIVE(sc->sc_primary)) {
+		tmp_tp = lagg_link_active(sc, sc->sc_primary);
+		/*
+		 * If tmp_tp is null, we've recieved a packet when all
+		 * our links are down. Weird, but process it anyways.
+		 */
+		if ((tmp_tp == NULL || tmp_tp == lp)) {
+			m->m_pkthdr.rcvif = ifp;
+			return (m);
+		}
+	}
+
+	m_freem(m);
+	return (NULL);
+}
+
+/*
+ * Loadbalancing
+ */
+
+static int
+lagg_lb_attach(struct lagg_softc *sc)
+{
+	struct lagg_port *lp;
+	struct lagg_lb *lb;
+
+	if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb),
+	    M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
+		return (ENOMEM);
+
+	sc->sc_detach = lagg_lb_detach;
+	sc->sc_start = lagg_lb_start;
+	sc->sc_input = lagg_lb_input;
+	sc->sc_port_create = lagg_lb_port_create;
+	sc->sc_port_destroy = lagg_lb_port_destroy;
+	sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
+
+	lb->lb_key = arc4random();
+	sc->sc_psc = (caddr_t)lb;
+
+	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+		lagg_lb_port_create(lp);
+
+	return (0);
+}
+
+static int
+lagg_lb_detach(struct lagg_softc *sc)
+{
+	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
+	if (lb != NULL)
+		free(lb, M_DEVBUF);
+	return (0);
+}
+
+static int
+lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
+{
+	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
+	struct lagg_port *lp_next;
+	int i = 0;
+
+	bzero(&lb->lb_ports, sizeof(lb->lb_ports));
+	SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
+		if (lp_next == lp)
+			continue;
+		if (i >= LAGG_MAX_PORTS)
+			return (EINVAL);
+		if (sc->sc_ifflags & IFF_DEBUG)
+			printf("%s: port %s at index %d\n",
+			    sc->sc_ifname, lp_next->lp_ifname, i);
+		lb->lb_ports[i++] = lp_next;
+	}
+
+	return (0);
+}
+
+static int
+lagg_lb_port_create(struct lagg_port *lp)
+{
+	struct lagg_softc *sc = lp->lp_softc;
+	return (lagg_lb_porttable(sc, NULL));
+}
+
+static void
+lagg_lb_port_destroy(struct lagg_port *lp)
+{
+	struct lagg_softc *sc = lp->lp_softc;
+	lagg_lb_porttable(sc, lp);
+}
+
+static int
+lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
+{
+	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
+	struct lagg_port *lp = NULL;
+	uint32_t p = 0;
+	int idx;
+
+	p = lagg_hashmbuf(m, lb->lb_key);
+	if ((idx = p % sc->sc_count) >= LAGG_MAX_PORTS) {
+		m_freem(m);
+		return (EINVAL);
+	}
+	lp = lb->lb_ports[idx];
+
+	/*
+	 * Check the port's link state. This will return the next active
+	 * port if the link is down or the port is NULL.
+	 */
+	if ((lp = lagg_link_active(sc, lp)) == NULL) {
+		m_freem(m);
+		return (ENOENT);
+	}
+
+	/* Send mbuf */
+	return (lagg_enqueue(lp->lp_ifp, m));
+}
+
+static struct mbuf *
+lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
+{
+	struct ifnet *ifp = sc->sc_ifp;
+
+	/* Just pass in the packet to our lagg device */
+	m->m_pkthdr.rcvif = ifp;
+
+	return (m);
+}
+
+/*
+ * 802.3ad LACP
+ */
+
+static int
+lagg_lacp_attach(struct lagg_softc *sc)
+{
+	struct lagg_port *lp;
+	int error;
+
+	sc->sc_detach = lagg_lacp_detach;
+	sc->sc_port_create = lacp_port_create;
+	sc->sc_port_destroy = lacp_port_destroy;
+	sc->sc_linkstate = lacp_linkstate;
+	sc->sc_start = lagg_lacp_start;
+	sc->sc_input = lagg_lacp_input;
+	sc->sc_init = lacp_init;
+	sc->sc_stop = lacp_stop;
+	sc->sc_lladdr = lagg_lacp_lladdr;
+	sc->sc_req = lacp_req;
+	sc->sc_portreq = lacp_portreq;
+
+	error = lacp_attach(sc);
+	if (error)
+		return (error);
+
+	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+		lacp_port_create(lp);
+
+	return (error);
+}
+
+static int
+lagg_lacp_detach(struct lagg_softc *sc)
+{
+	struct lagg_port *lp;
+	int error;
+
+	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+		lacp_port_destroy(lp);
+
+	/* unlocking is safe here */
+	LAGG_WUNLOCK(sc);
+	error = lacp_detach(sc);
+	LAGG_WLOCK(sc);
+
+	return (error);
+}
+
+static void
+lagg_lacp_lladdr(struct lagg_softc *sc)
+{
+	struct lagg_port *lp;
+
+	/* purge all the lacp ports */
+	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+		lacp_port_destroy(lp);
+
+	/* add them back in */
+	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+		lacp_port_create(lp);
+}
+
+static int
+lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
+{
+	struct lagg_port *lp;
+
+	lp = lacp_select_tx_port(sc, m);
+	if (lp == NULL) {
+		m_freem(m);
+		return (EBUSY);
+	}
+
+	/* Send mbuf */
+	return (lagg_enqueue(lp->lp_ifp, m));
+}
+
+static struct mbuf *
+lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
+{
+	struct ifnet *ifp = sc->sc_ifp;
+	struct ether_header *eh;
+	u_short etype;
+
+	eh = mtod(m, struct ether_header *);
+	etype = ntohs(eh->ether_type);
+
+	/* Tap off LACP control messages */
+	if (etype == ETHERTYPE_SLOW) {
+		lacp_input(lp, m);
+		return (NULL);
+	}
+
+	/*
+	 * If the port is not collecting or not in the active aggregator then
+	 * free and return.
+	 */
+	if ((lp->lp_flags & LAGG_PORT_COLLECTING) == 0 ||
+	    lacp_port_isactive(lp) == 0) {
+		m_freem(m);
+		return (NULL);
+	}
+
+	m->m_pkthdr.rcvif = ifp;
+	return (m);
+}
Index: bpfdesc.h
===================================================================
RCS file: /home/cvs/src/sys/net/bpfdesc.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/bpfdesc.h -L sys/net/bpfdesc.h -u -r1.1.1.1 -r1.2
--- sys/net/bpfdesc.h
+++ sys/net/bpfdesc.h
@@ -33,7 +33,7 @@
  *
  *      @(#)bpfdesc.h	8.1 (Berkeley) 6/10/93
  *
- * $FreeBSD: src/sys/net/bpfdesc.h,v 1.29.2.2 2005/09/29 23:48:04 csjp Exp $
+ * $FreeBSD: src/sys/net/bpfdesc.h,v 1.38 2007/08/06 14:26:00 rwatson Exp $
  */
 
 #ifndef _NET_BPFDESC_H_
@@ -51,12 +51,12 @@
 struct bpf_d {
 	LIST_ENTRY(bpf_d) bd_next;	/* Linked list of descriptors */
 	/*
-	 * Buffer slots: two mbuf clusters buffer the incoming packets.
+	 * Buffer slots: two malloc buffers store the incoming packets.
 	 *   The model has three slots.  Sbuf is always occupied.
 	 *   sbuf (store) - Receive interrupt puts packets here.
-	 *   hbuf (hold) - When sbuf is full, put cluster here and
+	 *   hbuf (hold) - When sbuf is full, put buffer here and
 	 *                 wakeup read (replace sbuf with fbuf).
-	 *   fbuf (free) - When read is done, put cluster here.
+	 *   fbuf (free) - When read is done, put buffer here.
 	 * On receiving, if sbuf is full and fbuf is 0, packet is dropped.
 	 */
 	caddr_t		bd_sbuf;	/* store slot */
@@ -71,6 +71,9 @@
 	u_long		bd_rtout;	/* Read timeout in 'ticks' */
 	struct bpf_insn *bd_rfilter; 	/* read filter code */
 	struct bpf_insn *bd_wfilter;	/* write filter code */
+#ifdef BPF_JITTER
+	bpf_jit_filter	*bd_bfilter;	/* binary filter code */
+#endif
 	u_long		bd_rcount;	/* number of packets received */
 	u_long		bd_dcount;	/* number of packets dropped */
 
@@ -78,24 +81,17 @@
 	u_char		bd_state;	/* idle, waiting, or timed out */
 	u_char		bd_immediate;	/* true to return on packet arrival */
 	int		bd_hdrcmplt;	/* false to fill in src lladdr automatically */
-	int		bd_seesent;	/* true if bpf should see sent packets */
+	int		bd_direction;	/* select packet direction */
+	int		bd_feedback;	/* true to feed back sent packets */
 	int		bd_async;	/* non-zero if packet reception should generate signal */
 	int		bd_sig;		/* signal to send upon packet reception */
 	struct sigio *	bd_sigio;	/* information for async I/O */
-#if BSD < 199103
-	u_char		bd_selcoll;	/* true if selects collide */
-	int		bd_timedout;
-	struct thread *	bd_selthread;	/* process that last selected us */
-#else
-	u_char		bd_pad;		/* explicit alignment */
 	struct selinfo	bd_sel;		/* bsd select info */
-#endif
 	struct mtx	bd_mtx;		/* mutex for this descriptor */
 	struct callout	bd_callout;	/* for BPF timeouts with select */
 	struct label	*bd_label;	/* MAC label for descriptor */
 	u_long		bd_fcount;	/* number of packets which matched filter */
 	pid_t		bd_pid;		/* PID which created descriptor */
-	char		bd_pcomm[MAXCOMLEN + 1];
 	int		bd_locked;	/* true if descriptor is locked */
 };
 
@@ -106,10 +102,7 @@
 
 #define BPFD_LOCK(bd)		mtx_lock(&(bd)->bd_mtx)
 #define BPFD_UNLOCK(bd)		mtx_unlock(&(bd)->bd_mtx)
-#define BPFD_LOCK_ASSERT(bd)	do {				\
-	mtx_assert(&(bd)->bd_mtx, MA_OWNED);			\
-	NET_ASSERT_GIANT();					\
-} while (0)
+#define BPFD_LOCK_ASSERT(bd)	mtx_assert(&(bd)->bd_mtx, MA_OWNED);
 
 /* Test whether a BPF is ready for read(). */
 #define	bpf_ready(bd)						 \
@@ -118,26 +111,14 @@
 	  (bd)->bd_slen != 0))
 
 /*
- * Descriptor associated with each attached hardware interface.
- */
-struct bpf_if {
-	LIST_ENTRY(bpf_if)	bif_next;	/* list of all interfaces */
-	LIST_HEAD(, bpf_d)	bif_dlist;	/* descriptor list */
-	struct bpf_if **bif_driverp;	/* pointer into softc */
-	u_int bif_dlt;			/* link layer type */
-	u_int bif_hdrlen;		/* length of header (with padding) */
-	struct ifnet *bif_ifp;		/* corresponding interface */
-	struct mtx	bif_mtx;	/* mutex for interface */
-};
-
-/*
  * External representation of the bpf descriptor
  */
 struct xbpf_d {
 	u_char		bd_promisc;
 	u_char		bd_immediate;
 	int		bd_hdrcmplt;
-	int		bd_seesent;
+	int		bd_direction;
+	int		bd_feedback;
 	int		bd_async;
 	u_long		bd_rcount;
 	u_long		bd_dcount;
@@ -148,7 +129,6 @@
 	int		bd_bufsize;
 	pid_t		bd_pid;
 	char		bd_ifname[IFNAMSIZ];
-	char		bd_pcomm[MAXCOMLEN + 1];
 	int		bd_locked;
 };
 
Index: if_clone.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_clone.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_clone.c -L sys/net/if_clone.c -u -r1.1.1.1 -r1.2
--- sys/net/if_clone.c
+++ sys/net/if_clone.c
@@ -27,11 +27,12 @@
  * SUCH DAMAGE.
  *
  *	@(#)if.c	8.5 (Berkeley) 1/9/95
- * $FreeBSD: src/sys/net/if_clone.c,v 1.6 2005/02/24 13:14:41 maxim Exp $
+ * $FreeBSD: src/sys/net/if_clone.c,v 1.11 2006/07/09 06:04:00 sam Exp $
  */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
+#include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
@@ -49,7 +50,10 @@
 #include <net/radix.h>
 #include <net/route.h>
 
-static void		if_clone_free(struct if_clone *ifc);
+static void	if_clone_free(struct if_clone *ifc);
+static int	if_clone_createif(struct if_clone *ifc, char *name, size_t len,
+		    caddr_t params);
+static int	if_clone_destroyif(struct if_clone *ifc, struct ifnet *ifp);
 
 static struct mtx	if_cloners_mtx;
 static int		if_cloners_count;
@@ -100,6 +104,11 @@
 		}							\
 	} while (0)
 
+#define IFC_IFLIST_INSERT(_ifc, _ifp)					\
+	LIST_INSERT_HEAD(&_ifc->ifc_iflist, _ifp, if_clones)
+#define IFC_IFLIST_REMOVE(_ifc, _ifp)					\
+	LIST_REMOVE(_ifp, if_clones)
+
 static MALLOC_DEFINE(M_CLONE, "clone", "interface cloning framework");
 
 void
@@ -109,22 +118,17 @@
 }
 
 /*
- * Create a clone network interface.
+ * Lookup and create a clone network interface.
  */
 int
-if_clone_create(char *name, size_t len)
+if_clone_create(char *name, size_t len, caddr_t params)
 {
-	int err;
 	struct if_clone *ifc;
 
-	if (ifunit(name) != NULL)
-		return (EEXIST);
-
 	/* Try to find an applicable cloner for this request */
 	IF_CLONERS_LOCK();
 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
 		if (ifc->ifc_match(ifc, name)) {
-			IF_CLONE_ADDREF(ifc);
 			break;
 		}
 	}
@@ -133,18 +137,44 @@
 	if (ifc == NULL)
 		return (EINVAL);
 
-	err = (*ifc->ifc_create)(ifc, name, len);
-	IF_CLONE_REMREF(ifc);
+	return (if_clone_createif(ifc, name, len, params));
+}
+
+/*
+ * Create a clone network interface.
+ */
+static int
+if_clone_createif(struct if_clone *ifc, char *name, size_t len, caddr_t params)
+{
+	int err;
+	struct ifnet *ifp;
+
+	if (ifunit(name) != NULL)
+		return (EEXIST);
+
+	err = (*ifc->ifc_create)(ifc, name, len, params);
+	
+	if (!err) {
+		ifp = ifunit(name);
+		if (ifp == NULL)
+			panic("%s: lookup failed for %s", __func__, name);
+
+		if_addgroup(ifp, ifc->ifc_name);
+
+		IF_CLONE_LOCK(ifc);
+		IFC_IFLIST_INSERT(ifc, ifp);
+		IF_CLONE_UNLOCK(ifc);
+	}
+
 	return (err);
 }
 
 /*
- * Destroy a clone network interface.
+ * Lookup and destroy a clone network interface.
  */
 int
 if_clone_destroy(const char *name)
 {
-	int err;
 	struct if_clone *ifc;
 	struct ifnet *ifp;
 
@@ -156,7 +186,6 @@
 	IF_CLONERS_LOCK();
 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
 		if (strcmp(ifc->ifc_name, ifp->if_dname) == 0) {
-			IF_CLONE_ADDREF(ifc);
 			break;
 		}
 	}
@@ -164,15 +193,39 @@
 	if (ifc == NULL)
 		return (EINVAL);
 
+	return (if_clone_destroyif(ifc, ifp));
+}
+
+/*
+ * Destroy a clone network interface.
+ */
+static int
+if_clone_destroyif(struct if_clone *ifc, struct ifnet *ifp)
+{
+	int err;
+
 	if (ifc->ifc_destroy == NULL) {
 		err = EOPNOTSUPP;
 		goto done;
 	}
 
+	IF_CLONE_LOCK(ifc);
+	IFC_IFLIST_REMOVE(ifc, ifp);
+	IF_CLONE_UNLOCK(ifc);
+
+	if_delgroup(ifp, ifc->ifc_name);
+
 	err =  (*ifc->ifc_destroy)(ifc, ifp);
 
+	if (err != 0) {
+		if_addgroup(ifp, ifc->ifc_name);
+
+		IF_CLONE_LOCK(ifc);
+		IFC_IFLIST_INSERT(ifc, ifp);
+		IF_CLONE_UNLOCK(ifc);
+	}
+
 done:
-	IF_CLONE_REMREF(ifc);
 	return (err);
 }
 
@@ -201,6 +254,8 @@
 	if_cloners_count++;
 	IF_CLONERS_UNLOCK();
 
+	LIST_INIT(&ifc->ifc_iflist);
+
 	if (ifc->ifc_attach != NULL)
 		(*ifc->ifc_attach)(ifc);
 	EVENTHANDLER_INVOKE(if_clone_event, ifc);
@@ -212,18 +267,34 @@
 void
 if_clone_detach(struct if_clone *ifc)
 {
+	struct ifc_simple_data *ifcs = ifc->ifc_data;
 
 	IF_CLONERS_LOCK();
 	LIST_REMOVE(ifc, ifc_list);
 	if_cloners_count--;
 	IF_CLONERS_UNLOCK();
 
+	/* Allow all simples to be destroyed */
+	if (ifc->ifc_attach == ifc_simple_attach)
+		ifcs->ifcs_minifs = 0;
+
+	/* destroy all interfaces for this cloner */
+	while (!LIST_EMPTY(&ifc->ifc_iflist))
+		if_clone_destroyif(ifc, LIST_FIRST(&ifc->ifc_iflist));
+	
 	IF_CLONE_REMREF(ifc);
 }
 
 static void
 if_clone_free(struct if_clone *ifc)
 {
+	for (int bytoff = 0; bytoff < ifc->ifc_bmlen; bytoff++) {
+		KASSERT(ifc->ifc_units[bytoff] == 0x00,
+		    ("ifc_units[%d] is not empty", bytoff));
+	}
+
+	KASSERT(LIST_EMPTY(&ifc->ifc_iflist),
+	    ("%s: ifc_iflist not empty", __func__));
 
 	IF_CLONE_LOCK_DESTROY(ifc);
 	free(ifc->ifc_units, M_CLONE);
@@ -292,16 +363,24 @@
 ifc_name2unit(const char *name, int *unit)
 {
 	const char	*cp;
+	int		cutoff = INT_MAX / 10;
+	int		cutlim = INT_MAX % 10;
 
 	for (cp = name; *cp != '\0' && (*cp < '0' || *cp > '9'); cp++);
 	if (*cp == '\0') {
 		*unit = -1;
+	} else if (cp[0] == '0' && cp[1] != '\0') {
+		/* Disallow leading zeroes. */
+		return (EINVAL);
 	} else {
 		for (*unit = 0; *cp != '\0'; cp++) {
 			if (*cp < '0' || *cp > '9') {
 				/* Bogus unit number. */
 				return (EINVAL);
 			}
+			if (*unit > cutoff ||
+			    (*unit == cutoff && *cp - '0' > cutlim))
+				return (EINVAL);
 			*unit = (*unit * 10) + (*cp - '0');
 		}
 	}
@@ -352,7 +431,10 @@
 	/*
 	 * Allocate the unit in the bitmap.
 	 */
+	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0,
+	    ("%s: bit is already set", __func__));
 	ifc->ifc_units[bytoff] |= (1 << bitoff);
+	IF_CLONE_ADDREF_LOCKED(ifc);
 
 done:
 	IF_CLONE_UNLOCK(ifc);
@@ -375,7 +457,7 @@
 	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0,
 	    ("%s: bit is already cleared", __func__));
 	ifc->ifc_units[bytoff] &= ~(1 << bitoff);
-	IF_CLONE_UNLOCK(ifc);
+	IF_CLONE_REMREF_LOCKED(ifc);	/* releases lock */
 }
 
 void
@@ -387,13 +469,13 @@
 	struct ifc_simple_data *ifcs = ifc->ifc_data;
 
 	KASSERT(ifcs->ifcs_minifs - 1 <= ifc->ifc_maxunit,
-	    ("%s: %s requested more units then allowed (%d > %d)",
+	    ("%s: %s requested more units than allowed (%d > %d)",
 	    __func__, ifc->ifc_name, ifcs->ifcs_minifs,
 	    ifc->ifc_maxunit + 1));
 
 	for (unit = 0; unit < ifcs->ifcs_minifs; unit++) {
 		snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, unit);
-		err = (*ifc->ifc_create)(ifc, name, IFNAMSIZ);
+		err = if_clone_createif(ifc, name, IFNAMSIZ, NULL);
 		KASSERT(err == 0,
 		    ("%s: failed to create required interface %s",
 		    __func__, name));
@@ -422,7 +504,7 @@
 }
 
 int
-ifc_simple_create(struct if_clone *ifc, char *name, size_t len)
+ifc_simple_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 {
 	char *dp;
 	int wildcard;
@@ -440,7 +522,7 @@
 	if (err != 0)
 		return (err);
 
-	err = ifcs->ifcs_create(ifc, unit);
+	err = ifcs->ifcs_create(ifc, unit, params);
 	if (err != 0) {
 		ifc_free_unit(ifc, unit);
 		return (err);
Index: if_vlan.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_vlan.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/net/if_vlan.c -L sys/net/if_vlan.c -u -r1.1.1.2 -r1.2
--- sys/net/if_vlan.c
+++ sys/net/if_vlan.c
@@ -26,7 +26,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/net/if_vlan.c,v 1.79.2.10 2006/02/18 22:48:16 yar Exp $
+ * $FreeBSD: src/sys/net/if_vlan.c,v 1.124.2.1 2007/10/28 16:24:16 thompsa Exp $
  */
 
 /*
@@ -41,13 +41,15 @@
  * and ask it to send them.
  */
 
-#include "opt_inet.h"
+#include "opt_vlan.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
+#include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
+#include <sys/rwlock.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
@@ -58,45 +60,63 @@
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_clone.h>
-#include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
-#ifdef INET
-#include <netinet/in.h>
-#include <netinet/if_ether.h>
-#endif
-
 #define VLANNAME	"vlan"
+#define	VLAN_DEF_HWIDTH	4
 #define	VLAN_IFFLAGS	(IFF_BROADCAST | IFF_MULTICAST)
 
+#define	UP_AND_RUNNING(ifp) \
+    ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING)
+
+LIST_HEAD(ifvlanhead, ifvlan);
+
+struct ifvlantrunk {
+	struct	ifnet   *parent;	/* parent interface of this trunk */
+	struct	rwlock	rw;
+#ifdef VLAN_ARRAY
+#define	VLAN_ARRAY_SIZE	(EVL_VLID_MASK + 1)
+	struct	ifvlan	*vlans[VLAN_ARRAY_SIZE]; /* static table */
+#else
+	struct	ifvlanhead *hash;	/* dynamic hash-list table */
+	uint16_t	hmask;
+	uint16_t	hwidth;
+#endif
+	int		refcnt;
+};
+
 struct vlan_mc_entry {
 	struct ether_addr		mc_addr;
 	SLIST_ENTRY(vlan_mc_entry)	mc_entries;
 };
 
 struct	ifvlan {
+	struct	ifvlantrunk *ifv_trunk;
 	struct	ifnet *ifv_ifp;
-	struct	ifnet *ifv_p;	/* parent inteface of this vlan */
+#define	TRUNK(ifv)	((ifv)->ifv_trunk)
+#define	PARENT(ifv)	((ifv)->ifv_trunk->parent)
 	int	ifv_pflags;	/* special flags we have set on parent */
 	struct	ifv_linkmib {
-		int	ifvm_parent;
 		int	ifvm_encaplen;	/* encapsulation length */
 		int	ifvm_mtufudge;	/* MTU fudged by this much */
 		int	ifvm_mintu;	/* min transmission unit */
-		u_int16_t ifvm_proto; /* encapsulation ethertype */
-		u_int16_t ifvm_tag; /* tag to apply on packets leaving if */
+		uint16_t ifvm_proto;	/* encapsulation ethertype */
+		uint16_t ifvm_tag;	/* tag to apply on packets leaving if */
 	}	ifv_mib;
-	SLIST_HEAD(__vlan_mchead, vlan_mc_entry)	vlan_mc_listhead;
+	SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
+#ifndef VLAN_ARRAY
 	LIST_ENTRY(ifvlan) ifv_list;
+#endif
 };
-#define	ifv_tag	ifv_mib.ifvm_tag
+#define	ifv_proto	ifv_mib.ifvm_proto
+#define	ifv_tag		ifv_mib.ifvm_tag
 #define	ifv_encaplen	ifv_mib.ifvm_encaplen
 #define	ifv_mtufudge	ifv_mib.ifvm_mtufudge
 #define	ifv_mintu	ifv_mib.ifvm_mintu
 
-/* Special flags we should propagate to parent */
+/* Special flags we should propagate to parent. */
 static struct {
 	int flag;
 	int (*func)(struct ifnet *, int);
@@ -110,23 +130,54 @@
 SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0, "IEEE 802.1Q VLAN");
 SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0, "for consistency");
 
+static int soft_pad = 0;
+SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0,
+	   "pad short frames before tagging");
+
 static MALLOC_DEFINE(M_VLAN, VLANNAME, "802.1Q Virtual LAN Interface");
-static LIST_HEAD(, ifvlan) ifv_list;
+
+static eventhandler_tag ifdetach_tag;
 
 /*
- * Locking: one lock is used to guard both the ifv_list and modification
- * to vlan data structures.  We are rather conservative here; probably
- * more than necessary.
+ * We have a global mutex, that is used to serialize configuration
+ * changes and isn't used in normal packet delivery.
+ *
+ * We also have a per-trunk rwlock, that is locked shared on packet
+ * processing and exclusive when configuration is changed.
+ *
+ * The VLAN_ARRAY substitutes the dynamic hash with a static array
+ * with 4096 entries. In theory this can give a boost in processing,
+ * however on practice it does not. Probably this is because array
+ * is too big to fit into CPU cache.
  */
 static struct mtx ifv_mtx;
-#define	VLAN_LOCK_INIT()	mtx_init(&ifv_mtx, VLANNAME, NULL, MTX_DEF)
+#define	VLAN_LOCK_INIT()	mtx_init(&ifv_mtx, "vlan_global", NULL, MTX_DEF)
 #define	VLAN_LOCK_DESTROY()	mtx_destroy(&ifv_mtx)
 #define	VLAN_LOCK_ASSERT()	mtx_assert(&ifv_mtx, MA_OWNED)
-#define	VLAN_LOCK()	mtx_lock(&ifv_mtx)
-#define	VLAN_UNLOCK()	mtx_unlock(&ifv_mtx)
+#define	VLAN_LOCK()		mtx_lock(&ifv_mtx)
+#define	VLAN_UNLOCK()		mtx_unlock(&ifv_mtx)
+#define	TRUNK_LOCK_INIT(trunk)	rw_init(&(trunk)->rw, VLANNAME)
+#define	TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw)
+#define	TRUNK_LOCK(trunk)	rw_wlock(&(trunk)->rw)
+#define	TRUNK_UNLOCK(trunk)	rw_wunlock(&(trunk)->rw)
+#define	TRUNK_LOCK_ASSERT(trunk) rw_assert(&(trunk)->rw, RA_WLOCKED)
+#define	TRUNK_RLOCK(trunk)	rw_rlock(&(trunk)->rw)
+#define	TRUNK_RUNLOCK(trunk)	rw_runlock(&(trunk)->rw)
+#define	TRUNK_LOCK_RASSERT(trunk) rw_assert(&(trunk)->rw, RA_RLOCKED)
+
+#ifndef VLAN_ARRAY
+static	void vlan_inithash(struct ifvlantrunk *trunk);
+static	void vlan_freehash(struct ifvlantrunk *trunk);
+static	int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
+static	int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
+static	void vlan_growhash(struct ifvlantrunk *trunk, int howmuch);
+static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
+	uint16_t tag);
+#endif
+static	void trunk_destroy(struct ifvlantrunk *trunk);
 
 static	void vlan_start(struct ifnet *ifp);
-static	void vlan_ifinit(void *foo);
+static	void vlan_init(void *foo);
 static	void vlan_input(struct ifnet *ifp, struct mbuf *m);
 static	int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
 static	int vlan_setflag(struct ifnet *ifp, int flag, int status,
@@ -134,18 +185,215 @@
 static	int vlan_setflags(struct ifnet *ifp, int status);
 static	int vlan_setmulti(struct ifnet *ifp);
 static	int vlan_unconfig(struct ifnet *ifp);
-static	int vlan_config(struct ifvlan *ifv, struct ifnet *p);
+static	int vlan_unconfig_locked(struct ifnet *ifp);
+static	int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag);
 static	void vlan_link_state(struct ifnet *ifp, int link);
+static	void vlan_capabilities(struct ifvlan *ifv);
+static	void vlan_trunk_capabilities(struct ifnet *ifp);
 
 static	struct ifnet *vlan_clone_match_ethertag(struct if_clone *,
     const char *, int *);
 static	int vlan_clone_match(struct if_clone *, const char *);
-static	int vlan_clone_create(struct if_clone *, char *, size_t);
+static	int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t);
 static	int vlan_clone_destroy(struct if_clone *, struct ifnet *);
 
+static	void vlan_ifdetach(void *arg, struct ifnet *ifp);
+
 static	struct if_clone vlan_cloner = IFC_CLONE_INITIALIZER(VLANNAME, NULL,
     IF_MAXUNIT, NULL, vlan_clone_match, vlan_clone_create, vlan_clone_destroy);
 
+#ifndef VLAN_ARRAY
+#define HASH(n, m)	((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m))
+
+static void
+vlan_inithash(struct ifvlantrunk *trunk)
+{
+	int i, n;
+	
+	/*
+	 * The trunk must not be locked here since we call malloc(M_WAITOK).
+	 * It is OK in case this function is called before the trunk struct
+	 * gets hooked up and becomes visible from other threads.
+	 */
+
+	KASSERT(trunk->hwidth == 0 && trunk->hash == NULL,
+	    ("%s: hash already initialized", __func__));
+
+	trunk->hwidth = VLAN_DEF_HWIDTH;
+	n = 1 << trunk->hwidth;
+	trunk->hmask = n - 1;
+	trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK);
+	for (i = 0; i < n; i++)
+		LIST_INIT(&trunk->hash[i]);
+}
+
+static void
+vlan_freehash(struct ifvlantrunk *trunk)
+{
+#ifdef INVARIANTS
+	int i;
+
+	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
+	for (i = 0; i < (1 << trunk->hwidth); i++)
+		KASSERT(LIST_EMPTY(&trunk->hash[i]),
+		    ("%s: hash table not empty", __func__));
+#endif
+	free(trunk->hash, M_VLAN);
+	trunk->hash = NULL;
+	trunk->hwidth = trunk->hmask = 0;
+}
+
+static int
+vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
+{
+	int i, b;
+	struct ifvlan *ifv2;
+
+	TRUNK_LOCK_ASSERT(trunk);
+	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
+
+	b = 1 << trunk->hwidth;
+	i = HASH(ifv->ifv_tag, trunk->hmask);
+	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
+		if (ifv->ifv_tag == ifv2->ifv_tag)
+			return (EEXIST);
+
+	/*
+	 * Grow the hash when the number of vlans exceeds half of the number of
+	 * hash buckets squared. This will make the average linked-list length
+	 * buckets/2.
+	 */
+	if (trunk->refcnt > (b * b) / 2) {
+		vlan_growhash(trunk, 1);
+		i = HASH(ifv->ifv_tag, trunk->hmask);
+	}
+	LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
+	trunk->refcnt++;
+
+	return (0);
+}
+
+static int
+vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
+{
+	int i, b;
+	struct ifvlan *ifv2;
+
+	TRUNK_LOCK_ASSERT(trunk);
+	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
+	
+	b = 1 << trunk->hwidth;
+	i = HASH(ifv->ifv_tag, trunk->hmask);
+	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
+		if (ifv2 == ifv) {
+			trunk->refcnt--;
+			LIST_REMOVE(ifv2, ifv_list);
+			if (trunk->refcnt < (b * b) / 2)
+				vlan_growhash(trunk, -1);
+			return (0);
+		}
+
+	panic("%s: vlan not found\n", __func__);
+	return (ENOENT); /*NOTREACHED*/
+}
+
+/*
+ * Grow the hash larger or smaller if memory permits.
+ */
+static void
+vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
+{
+	struct ifvlan *ifv;
+	struct ifvlanhead *hash2;
+	int hwidth2, i, j, n, n2;
+
+	TRUNK_LOCK_ASSERT(trunk);
+	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
+
+	if (howmuch == 0) {
+		/* Harmless yet obvious coding error */
+		printf("%s: howmuch is 0\n", __func__);
+		return;
+	}
+
+	hwidth2 = trunk->hwidth + howmuch;
+	n = 1 << trunk->hwidth;
+	n2 = 1 << hwidth2;
+	/* Do not shrink the table below the default */
+	if (hwidth2 < VLAN_DEF_HWIDTH)
+		return;
+
+	/* M_NOWAIT because we're called with trunk mutex held */
+	hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT);
+	if (hash2 == NULL) {
+		printf("%s: out of memory -- hash size not changed\n",
+		    __func__);
+		return;		/* We can live with the old hash table */
+	}
+	for (j = 0; j < n2; j++)
+		LIST_INIT(&hash2[j]);
+	for (i = 0; i < n; i++)
+		while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) {
+			LIST_REMOVE(ifv, ifv_list);
+			j = HASH(ifv->ifv_tag, n2 - 1);
+			LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
+		}
+	free(trunk->hash, M_VLAN);
+	trunk->hash = hash2;
+	trunk->hwidth = hwidth2;
+	trunk->hmask = n2 - 1;
+
+	if (bootverbose)
+		if_printf(trunk->parent,
+		    "VLAN hash table resized from %d to %d buckets\n", n, n2);
+}
+
+static __inline struct ifvlan *
+vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag)
+{
+	struct ifvlan *ifv;
+
+	TRUNK_LOCK_RASSERT(trunk);
+
+	LIST_FOREACH(ifv, &trunk->hash[HASH(tag, trunk->hmask)], ifv_list)
+		if (ifv->ifv_tag == tag)
+			return (ifv);
+	return (NULL);
+}
+
+#if 0
+/* Debugging code to view the hashtables. */
+static void
+vlan_dumphash(struct ifvlantrunk *trunk)
+{
+	int i;
+	struct ifvlan *ifv;
+
+	for (i = 0; i < (1 << trunk->hwidth); i++) {
+		printf("%d: ", i);
+		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
+			printf("%s ", ifv->ifv_ifp->if_xname);
+		printf("\n");
+	}
+}
+#endif /* 0 */
+#endif /* !VLAN_ARRAY */
+
+static void
+trunk_destroy(struct ifvlantrunk *trunk)
+{
+	VLAN_LOCK_ASSERT();
+
+	TRUNK_LOCK(trunk);
+#ifndef VLAN_ARRAY
+	vlan_freehash(trunk);
+#endif
+	trunk->parent->if_vlantrunk = NULL;
+	TRUNK_UNLOCK(trunk);
+	TRUNK_LOCK_DESTROY(trunk);
+	free(trunk, M_VLAN);
+}
+
 /*
  * Program our multicast filter. What we're actually doing is
  * programming the multicast filter of the parent. This has the
@@ -163,7 +411,7 @@
 	struct ifnet		*ifp_p;
 	struct ifmultiaddr	*ifma, *rifma = NULL;
 	struct ifvlan		*sc;
-	struct vlan_mc_entry	*mc = NULL;
+	struct vlan_mc_entry	*mc;
 	struct sockaddr_dl	sdl;
 	int			error;
 
@@ -171,14 +419,7 @@
 
 	/* Find the parent. */
 	sc = ifp->if_softc;
-	ifp_p = sc->ifv_p;
-
-	/*
-	 * If we don't have a parent, just remember the membership for
-	 * when we do.
-	 */
-	if (ifp_p == NULL)
-		return (0);
+	ifp_p = PARENT(sc);
 
 	bzero((char *)&sdl, sizeof(sdl));
 	sdl.sdl_len = sizeof(sdl);
@@ -188,8 +429,7 @@
 	sdl.sdl_alen = ETHER_ADDR_LEN;
 
 	/* First, remove any existing filter entries. */
-	while (SLIST_FIRST(&sc->vlan_mc_listhead) != NULL) {
-		mc = SLIST_FIRST(&sc->vlan_mc_listhead);
+	while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
 		bcopy((char *)&mc->mc_addr, LLADDR(&sdl), ETHER_ADDR_LEN);
 		error = if_delmulti(ifp_p, (struct sockaddr *)&sdl);
 		if (error)
@@ -219,17 +459,59 @@
 }
 
 /*
+ * A handler for network interface departure events.
+ * Track departure of trunks here so that we don't access invalid
+ * pointers or whatever if a trunk is ripped from under us, e.g.,
+ * by ejecting its hot-plug card.
+ */
+static void
+vlan_ifdetach(void *arg __unused, struct ifnet *ifp)
+{
+	struct ifvlan *ifv;
+	int i;
+
+	/*
+	 * Check if it's a trunk interface first of all
+	 * to avoid needless locking.
+	 */
+	if (ifp->if_vlantrunk == NULL)
+		return;
+
+	VLAN_LOCK();
+	/*
+	 * OK, it's a trunk.  Loop over and detach all vlan's on it.
+	 * Check trunk pointer after each vlan_unconfig() as it will
+	 * free it and set to NULL after the last vlan was detached.
+	 */
+#ifdef VLAN_ARRAY
+	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
+		if ((ifv = ifp->if_vlantrunk->vlans[i])) {
+			vlan_unconfig_locked(ifv->ifv_ifp);
+			if (ifp->if_vlantrunk == NULL)
+				break;
+		}
+#else /* VLAN_ARRAY */
+restart:
+	for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
+		if ((ifv = LIST_FIRST(&ifp->if_vlantrunk->hash[i]))) {
+			vlan_unconfig_locked(ifv->ifv_ifp);
+			if (ifp->if_vlantrunk)
+				goto restart;	/* trunk->hwidth can change */
+			else
+				break;
+		}
+#endif /* VLAN_ARRAY */
+	/* Trunk should have been destroyed in vlan_unconfig(). */
+	KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__));
+	VLAN_UNLOCK();
+}
+
+/*
  * VLAN support can be loaded as a module.  The only place in the
  * system that's intimately aware of this is ether_input.  We hook
  * into this code through vlan_input_p which is defined there and
  * set here.  Noone else in the system should be aware of this so
  * we use an explicit reference here.
- *
- * NB: Noone should ever need to check if vlan_input_p is null or
- *     not.  This is because interfaces have a count of the number
- *     of active vlans (if_nvlans) and this should never be bumped
- *     except by vlan_config--which is in this module so therefore
- *     the module must be loaded and vlan_input_p must be non-NULL.
  */
 extern	void (*vlan_input_p)(struct ifnet *, struct mbuf *);
 
@@ -242,20 +524,34 @@
 
 	switch (type) {
 	case MOD_LOAD:
-		LIST_INIT(&ifv_list);
+		ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
+		    vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
+		if (ifdetach_tag == NULL)
+			return (ENOMEM);
 		VLAN_LOCK_INIT();
 		vlan_input_p = vlan_input;
 		vlan_link_state_p = vlan_link_state;
+		vlan_trunk_cap_p = vlan_trunk_capabilities;
 		if_clone_attach(&vlan_cloner);
+		if (bootverbose)
+			printf("vlan: initialized, using "
+#ifdef VLAN_ARRAY
+			       "full-size arrays"
+#else
+			       "hash tables with chaining"
+#endif
+			
+			       "\n");
 		break;
 	case MOD_UNLOAD:
 		if_clone_detach(&vlan_cloner);
+		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag);
 		vlan_input_p = NULL;
 		vlan_link_state_p = NULL;
-		while (!LIST_EMPTY(&ifv_list))
-			vlan_clone_destroy(&vlan_cloner,
-			    LIST_FIRST(&ifv_list)->ifv_ifp);
+		vlan_trunk_cap_p = NULL;
 		VLAN_LOCK_DESTROY();
+		if (bootverbose)
+			printf("vlan: unloaded\n");
 		break;
 	default:
 		return (EOPNOTSUPP);
@@ -270,6 +566,7 @@
 };
 
 DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_vlan, 3);
 MODULE_DEPEND(if_vlan, miibus, 1, 1, 1);
 
 static struct ifnet *
@@ -322,7 +619,7 @@
 }
 
 static int
-vlan_clone_create(struct if_clone *ifc, char *name, size_t len)
+vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 {
 	char *dp;
 	int wildcard;
@@ -333,9 +630,39 @@
 	struct ifvlan *ifv;
 	struct ifnet *ifp;
 	struct ifnet *p;
-	u_char eaddr[6] = {0,0,0,0,0,0};
+	struct vlanreq vlr;
+	static const u_char eaddr[ETHER_ADDR_LEN];	/* 00:00:00:00:00:00 */
+
+	/*
+	 * There are 3 (ugh) ways to specify the cloned device:
+	 * o pass a parameter block with the clone request.
+	 * o specify parameters in the text of the clone device name
+	 * o specify no parameters and get an unattached device that
+	 *   must be configured separately.
+	 * The first technique is preferred; the latter two are
+	 * supported for backwards compatibilty.
+	 */
+	if (params) {
+		error = copyin(params, &vlr, sizeof(vlr));
+		if (error)
+			return error;
+		p = ifunit(vlr.vlr_parent);
+		if (p == NULL)
+			return ENXIO;
+		/*
+		 * Don't let the caller set up a VLAN tag with
+		 * anything except VLID bits.
+		 */
+		if (vlr.vlr_tag & ~EVL_VLID_MASK)
+			return (EINVAL);
+		error = ifc_name2unit(name, &unit);
+		if (error != 0)
+			return (error);
 
-	if ((p = vlan_clone_match_ethertag(ifc, name, &tag)) != NULL) {
+		ethertag = 1;
+		tag = vlr.vlr_tag;
+		wildcard = (unit < 0);
+	} else if ((p = vlan_clone_match_ethertag(ifc, name, &tag)) != NULL) {
 		ethertag = 1;
 		unit = -1;
 		wildcard = 0;
@@ -391,7 +718,7 @@
 	ifp->if_linkmiblen = sizeof(ifv->ifv_mib);
 	/* NB: mtu is not set here */
 
-	ifp->if_init = vlan_ifinit;
+	ifp->if_init = vlan_init;
 	ifp->if_start = vlan_start;
 	ifp->if_ioctl = vlan_ioctl;
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
@@ -402,31 +729,21 @@
 	ifp->if_type = IFT_L2VLAN;
 	ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN;
 
-	VLAN_LOCK();
-	LIST_INSERT_HEAD(&ifv_list, ifv, ifv_list);
-	VLAN_UNLOCK();
-
 	if (ethertag) {
-		VLAN_LOCK();
-		error = vlan_config(ifv, p);
+		error = vlan_config(ifv, p, tag);
 		if (error != 0) {
 			/*
 			 * Since we've partialy failed, we need to back
 			 * out all the way, otherwise userland could get
 			 * confused.  Thus, we destroy the interface.
 			 */
-			LIST_REMOVE(ifv, ifv_list);
-			vlan_unconfig(ifp);
-			VLAN_UNLOCK();
 			ether_ifdetach(ifp);
+			vlan_unconfig(ifp);
 			if_free_type(ifp, IFT_ETHER);
 			free(ifv, M_VLAN);
 
 			return (error);
 		}
-		ifv->ifv_tag = tag;
-		ifp->if_drv_flags |= IFF_DRV_RUNNING;
-		VLAN_UNLOCK();
 
 		/* Update flags on the parent, if necessary. */
 		vlan_setflags(ifp, 1);
@@ -438,21 +755,13 @@
 static int
 vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
 {
-	int unit;
 	struct ifvlan *ifv = ifp->if_softc;
+	int unit = ifp->if_dunit;
 
-	unit = ifp->if_dunit;
-
-	VLAN_LOCK();
-	LIST_REMOVE(ifv, ifv_list);
-	vlan_unconfig(ifp);
-	VLAN_UNLOCK();
-
-	ether_ifdetach(ifp);
+	ether_ifdetach(ifp);	/* first, remove it from system-wide lists */
+	vlan_unconfig(ifp);	/* now it can be unconfigured and freed */
 	if_free_type(ifp, IFT_ETHER);
-
 	free(ifv, M_VLAN);
-
 	ifc_free_unit(ifc, unit);
 
 	return (0);
@@ -462,9 +771,8 @@
  * The ifp->if_init entry point for vlan(4) is a no-op.
  */
 static void
-vlan_ifinit(void *foo)
+vlan_init(void *foo __unused)
 {
-
 }
 
 /*
@@ -480,16 +788,15 @@
 {
 	struct ifvlan *ifv;
 	struct ifnet *p;
-	struct ether_vlan_header *evl;
 	struct mbuf *m;
 	int error;
 
 	ifv = ifp->if_softc;
-	p = ifv->ifv_p;
+	p = PARENT(ifv);
 
 	for (;;) {
 		IF_DEQUEUE(&ifp->if_snd, m);
-		if (m == 0)
+		if (m == NULL)
 			break;
 		BPF_MTAP(ifp, m);
 
@@ -497,14 +804,42 @@
 		 * Do not run parent's if_start() if the parent is not up,
 		 * or parent's driver will cause a system crash.
 		 */
-		if (!((p->if_flags & IFF_UP) &&
-		    (p->if_drv_flags & IFF_DRV_RUNNING))) {
+		if (!UP_AND_RUNNING(p)) {
 			m_freem(m);
 			ifp->if_collisions++;
 			continue;
 		}
 
 		/*
+		 * Pad the frame to the minimum size allowed if told to.
+		 * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
+		 * paragraph C.4.4.3.b.  It can help to work around buggy
+		 * bridges that violate paragraph C.4.4.3.a from the same
+		 * document, i.e., fail to pad short frames after untagging.
+		 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
+		 * untagging it will produce a 62-byte frame, which is a runt
+		 * and requires padding.  There are VLAN-enabled network
+		 * devices that just discard such runts instead or mishandle
+		 * them somehow.
+		 */
+		if (soft_pad) {
+			static char pad[8];	/* just zeros */
+			int n;
+
+			for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len;
+			     n > 0; n -= sizeof(pad))
+				if (!m_append(m, min(n, sizeof(pad)), pad))
+					break;
+
+			if (n > 0) {
+				if_printf(ifp, "cannot pad short frame\n");
+				ifp->if_oerrors++;
+				m_freem(m);
+				continue;
+			}
+		}
+
+		/*
 		 * If underlying interface can do VLAN tag insertion itself,
 		 * just pass the packet along. However, we need some way to
 		 * tell the interface where the packet came from so that it
@@ -512,52 +847,16 @@
 		 * packet tag that holds it.
 		 */
 		if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
-			struct m_tag *mtag = m_tag_alloc(MTAG_VLAN,
-							 MTAG_VLAN_TAG,
-							 sizeof(u_int),
-							 M_NOWAIT);
-			if (mtag == NULL) {
-				ifp->if_oerrors++;
-				m_freem(m);
-				continue;
-			}
-			VLAN_TAG_VALUE(mtag) = ifv->ifv_tag;
-			m_tag_prepend(m, mtag);
+			m->m_pkthdr.ether_vtag = ifv->ifv_tag;
 			m->m_flags |= M_VLANTAG;
 		} else {
-			M_PREPEND(m, ifv->ifv_encaplen, M_DONTWAIT);
+			m = ether_vlanencap(m, ifv->ifv_tag);
 			if (m == NULL) {
 				if_printf(ifp,
 				    "unable to prepend VLAN header\n");
 				ifp->if_oerrors++;
 				continue;
 			}
-			/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
-
-			if (m->m_len < sizeof(*evl)) {
-				m = m_pullup(m, sizeof(*evl));
-				if (m == NULL) {
-					if_printf(ifp,
-					    "cannot pullup VLAN header\n");
-					ifp->if_oerrors++;
-					continue;
-				}
-			}
-
-			/*
-			 * Transform the Ethernet header into an Ethernet header
-			 * with 802.1Q encapsulation.
-			 */
-			bcopy(mtod(m, char *) + ifv->ifv_encaplen,
-			      mtod(m, char *), ETHER_HDR_LEN);
-			evl = mtod(m, struct ether_vlan_header *);
-			evl->evl_proto = evl->evl_encap_proto;
-			evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
-			evl->evl_tag = htons(ifv->ifv_tag);
-#ifdef DEBUG
-			printf("%s: %*D\n", __func__, (int)sizeof(*evl),
-			    (unsigned char *)evl, ":");
-#endif
 		}
 
 		/*
@@ -575,27 +874,25 @@
 static void
 vlan_input(struct ifnet *ifp, struct mbuf *m)
 {
-	struct ether_vlan_header *evl;
+	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
 	struct ifvlan *ifv;
-	struct m_tag *mtag;
-	u_int tag;
+	uint16_t tag;
+
+	KASSERT(trunk != NULL, ("%s: no trunk", __func__));
 
 	if (m->m_flags & M_VLANTAG) {
 		/*
 		 * Packet is tagged, but m contains a normal
 		 * Ethernet frame; the tag is stored out-of-band.
 		 */
-		mtag = m_tag_locate(m, MTAG_VLAN, MTAG_VLAN_TAG, NULL);
-		KASSERT(mtag != NULL,
-			("%s: M_VLANTAG without m_tag", __func__));
-		tag = EVL_VLANOFTAG(VLAN_TAG_VALUE(mtag));
-		m_tag_delete(m, mtag);
+		tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
 		m->m_flags &= ~M_VLANTAG;
 	} else {
+		struct ether_vlan_header *evl;
+
 		/*
 		 * Packet is tagged in-band as specified by 802.1q.
 		 */
-		mtag = NULL;
 		switch (ifp->if_type) {
 		case IFT_ETHER:
 			if (m->m_len < sizeof(*evl) &&
@@ -604,59 +901,43 @@
 				return;
 			}
 			evl = mtod(m, struct ether_vlan_header *);
-			KASSERT(ntohs(evl->evl_encap_proto) == ETHERTYPE_VLAN,
-				("%s: bad encapsulation protocol (%u)",
-				 __func__, ntohs(evl->evl_encap_proto)));
-
 			tag = EVL_VLANOFTAG(ntohs(evl->evl_tag));
 
 			/*
-			 * Restore the original ethertype.  We'll remove
-			 * the encapsulation after we've found the vlan
-			 * interface corresponding to the tag.
+			 * Remove the 802.1q header by copying the Ethernet
+			 * addresses over it and adjusting the beginning of
+			 * the data in the mbuf.  The encapsulated Ethernet
+			 * type field is already in place.
 			 */
-			evl->evl_encap_proto = evl->evl_proto;
+			bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
+			      ETHER_HDR_LEN - ETHER_TYPE_LEN);
+			m_adj(m, ETHER_VLAN_ENCAP_LEN);
 			break;
+
 		default:
-			tag = (u_int) -1;
 #ifdef INVARIANTS
-			panic("%s: unsupported if_type (%u)",
-			      __func__, ifp->if_type);
+			panic("%s: %s has unsupported if_type %u",
+			      __func__, ifp->if_xname, ifp->if_type);
 #endif
-			break;
+			m_freem(m);
+			ifp->if_noproto++;
+			return;
 		}
 	}
 
-	VLAN_LOCK();
-	LIST_FOREACH(ifv, &ifv_list, ifv_list)
-		if (ifp == ifv->ifv_p && tag == ifv->ifv_tag)
-			break;
-
-	if (ifv == NULL || (ifv->ifv_ifp->if_flags & IFF_UP) == 0) {
-		VLAN_UNLOCK();
+	TRUNK_RLOCK(trunk);
+#ifdef VLAN_ARRAY
+	ifv = trunk->vlans[tag];
+#else
+	ifv = vlan_gethash(trunk, tag);
+#endif
+	if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
+		TRUNK_RUNLOCK(trunk);
 		m_freem(m);
 		ifp->if_noproto++;
-#ifdef DEBUG
-		printf("%s: tag %d, no interface\n", __func__, tag);
-#endif
 		return;
 	}
-	VLAN_UNLOCK();		/* XXX extend below? */
-#ifdef DEBUG
-	printf("%s: tag %d, parent %s\n", __func__, tag, ifv->ifv_p->if_xname);
-#endif
-
-	if (mtag == NULL) {
-		/*
-		 * Packet had an in-line encapsulation header;
-		 * remove it.  The original header has already
-		 * been fixed up above.
-		 */
-		bcopy(mtod(m, caddr_t),
-		      mtod(m, caddr_t) + ETHER_VLAN_ENCAP_LEN,
-		      ETHER_HDR_LEN);
-		m_adj(m, ETHER_VLAN_ENCAP_LEN);
-	}
+	TRUNK_RUNLOCK(trunk);
 
 	m->m_pkthdr.rcvif = ifv->ifv_ifp;
 	ifv->ifv_ifp->if_ipackets++;
@@ -666,33 +947,67 @@
 }
 
 static int
-vlan_config(struct ifvlan *ifv, struct ifnet *p)
+vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag)
 {
-	struct ifaddr *ifa1, *ifa2;
+	struct ifvlantrunk *trunk;
 	struct ifnet *ifp;
-	struct sockaddr_dl *sdl1, *sdl2;
-
-	VLAN_LOCK_ASSERT();
+	int error = 0;
 
+	/* VID numbers 0x0 and 0xFFF are reserved */
+	if (tag == 0 || tag == 0xFFF)
+		return (EINVAL);
 	if (p->if_type != IFT_ETHER)
 		return (EPROTONOSUPPORT);
 	if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS)
 		return (EPROTONOSUPPORT);
-	if (ifv->ifv_p)
+	if (ifv->ifv_trunk)
 		return (EBUSY);
 
+	if (p->if_vlantrunk == NULL) {
+		trunk = malloc(sizeof(struct ifvlantrunk),
+		    M_VLAN, M_WAITOK | M_ZERO);
+#ifndef VLAN_ARRAY
+		vlan_inithash(trunk);
+#endif
+		VLAN_LOCK();
+		if (p->if_vlantrunk != NULL) {
+			/* A race that that is very unlikely to be hit. */
+#ifndef VLAN_ARRAY
+			vlan_freehash(trunk);
+#endif
+			free(trunk, M_VLAN);
+			goto exists;
+		}
+		TRUNK_LOCK_INIT(trunk);
+		TRUNK_LOCK(trunk);
+		p->if_vlantrunk = trunk;
+		trunk->parent = p;
+	} else {
+		VLAN_LOCK();
+exists:
+		trunk = p->if_vlantrunk;
+		TRUNK_LOCK(trunk);
+	}
+
+	ifv->ifv_tag = tag;	/* must set this before vlan_inshash() */
+#ifdef VLAN_ARRAY
+	if (trunk->vlans[tag] != NULL) {
+		error = EEXIST;
+		goto done;
+	}
+	trunk->vlans[tag] = ifv;
+	trunk->refcnt++;
+#else
+	error = vlan_inshash(trunk, ifv);
+	if (error)
+		goto done;
+#endif
+	ifv->ifv_proto = ETHERTYPE_VLAN;
 	ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN;
 	ifv->ifv_mintu = ETHERMIN;
 	ifv->ifv_pflags = 0;
 
 	/*
-	 * The active VLAN counter on the parent is used
-	 * at various places to see if there is a vlan(4)
-	 * attached to this physical interface.
-	 */
-	p->if_nvlans++;
-
-	/*
 	 * If the parent supports the VLAN_MTU capability,
 	 * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames,
 	 * use it.
@@ -714,10 +1029,10 @@
 		ifv->ifv_mtufudge = ifv->ifv_encaplen;
 	}
 
-	ifv->ifv_p = p;
+	ifv->ifv_trunk = trunk;
 	ifp = ifv->ifv_ifp;
 	ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge;
-	ifv->ifv_ifp->if_baudrate = p->if_baudrate;
+	ifp->if_baudrate = p->if_baudrate;
 	/*
 	 * Copy only a selected subset of flags from the parent.
 	 * Other flags are none of our business.
@@ -729,33 +1044,13 @@
 
 	ifp->if_link_state = p->if_link_state;
 
-#if 0
-	/*
-	 * Not ready yet.  We need notification from the parent
-	 * when hw checksumming flags in its if_capenable change.
-	 * Flags set in if_capabilities only are useless.
-	 */
-	/*
-	 * If the parent interface can do hardware-assisted
-	 * VLAN encapsulation, then propagate its hardware-
-	 * assisted checksumming flags.
-	 */
-	if (p->if_capabilities & IFCAP_VLAN_HWTAGGING)
-		ifp->if_capabilities |= p->if_capabilities & IFCAP_HWCSUM;
-#endif
+	vlan_capabilities(ifv);
 
 	/*
 	 * Set up our ``Ethernet address'' to reflect the underlying
 	 * physical interface's.
 	 */
-	ifa1 = ifaddr_byindex(ifp->if_index);
-	ifa2 = ifaddr_byindex(p->if_index);
-	sdl1 = (struct sockaddr_dl *)ifa1->ifa_addr;
-	sdl2 = (struct sockaddr_dl *)ifa2->ifa_addr;
-	sdl1->sdl_type = IFT_ETHER;
-	sdl1->sdl_alen = ETHER_ADDR_LEN;
-	bcopy(LLADDR(sdl2), LLADDR(sdl1), ETHER_ADDR_LEN);
-	bcopy(LLADDR(sdl2), IFP2ENADDR(ifp), ETHER_ADDR_LEN);
+	bcopy(IF_LLADDR(p), IF_LLADDR(ifp), ETHER_ADDR_LEN);
 
 	/*
 	 * Configure multicast addresses that may already be
@@ -763,26 +1058,44 @@
 	 */
 	(void)vlan_setmulti(ifp); /* XXX: VLAN lock held */
 
-	return (0);
+	/* We are ready for operation now. */
+	ifp->if_drv_flags |= IFF_DRV_RUNNING;
+done:
+	TRUNK_UNLOCK(trunk);
+	VLAN_UNLOCK();
+
+	return (error);
 }
 
 static int
 vlan_unconfig(struct ifnet *ifp)
 {
-	struct ifaddr *ifa;
-	struct sockaddr_dl *sdl;
+	int ret;
+
+	VLAN_LOCK();
+	ret = vlan_unconfig_locked(ifp);
+	VLAN_UNLOCK();
+	return (ret);
+}
+
+static int
+vlan_unconfig_locked(struct ifnet *ifp)
+{
+	struct ifvlantrunk *trunk;
 	struct vlan_mc_entry *mc;
 	struct ifvlan *ifv;
-	struct ifnet *p;
 	int error;
 
 	VLAN_LOCK_ASSERT();
 
 	ifv = ifp->if_softc;
-	p = ifv->ifv_p;
+	trunk = ifv->ifv_trunk;
 
-	if (p) {
+	if (trunk) {
 		struct sockaddr_dl sdl;
+		struct ifnet *p = trunk->parent;
+
+		TRUNK_LOCK(trunk);
 
 		/*
 		 * Since the interface is being unconfigured, we need to
@@ -796,8 +1109,7 @@
 		sdl.sdl_type = IFT_ETHER;
 		sdl.sdl_alen = ETHER_ADDR_LEN;
 
-		while(SLIST_FIRST(&ifv->vlan_mc_listhead) != NULL) {
-			mc = SLIST_FIRST(&ifv->vlan_mc_listhead);
+		while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) {
 			bcopy((char *)&mc->mc_addr, LLADDR(&sdl),
 			    ETHER_ADDR_LEN);
 			error = if_delmulti(p, (struct sockaddr *)&sdl);
@@ -808,23 +1120,38 @@
 		}
 
 		vlan_setflags(ifp, 0); /* clear special flags on parent */
-		p->if_nvlans--;
+#ifdef VLAN_ARRAY
+		trunk->vlans[ifv->ifv_tag] = NULL;
+		trunk->refcnt--;
+#else
+		vlan_remhash(trunk, ifv);
+#endif
+		ifv->ifv_trunk = NULL;
+
+		/*
+		 * Check if we were the last.
+		 */
+		if (trunk->refcnt == 0) {
+			trunk->parent->if_vlantrunk = NULL;
+			/*
+			 * XXXGL: If some ithread has already entered
+			 * vlan_input() and is now blocked on the trunk
+			 * lock, then it should preempt us right after
+			 * unlock and finish its work. Then we will acquire
+			 * lock again in trunk_destroy().
+			 */
+			TRUNK_UNLOCK(trunk);
+			trunk_destroy(trunk);
+		} else
+			TRUNK_UNLOCK(trunk);
 	}
 
 	/* Disconnect from parent. */
 	if (ifv->ifv_pflags)
 		if_printf(ifp, "%s: ifv_pflags unclean\n", __func__);
-	ifv->ifv_p = NULL;
-	ifv->ifv_ifp->if_mtu = ETHERMTU;		/* XXX why not 0? */
-	ifv->ifv_ifp->if_link_state = LINK_STATE_UNKNOWN;
-
-	/* Clear our MAC address. */
-	ifa = ifaddr_byindex(ifv->ifv_ifp->if_index);
-	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
-	sdl->sdl_type = IFT_ETHER;
-	sdl->sdl_alen = ETHER_ADDR_LEN;
-	bzero(LLADDR(sdl), ETHER_ADDR_LEN);
-	bzero(IFP2ENADDR(ifv->ifv_ifp), ETHER_ADDR_LEN);
+	ifp->if_mtu = ETHERMTU;
+	ifp->if_link_state = LINK_STATE_UNKNOWN;
+	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 
 	return (0);
 }
@@ -853,7 +1180,7 @@
 	 * in accord with actual parent's flags.
 	 */
 	if (status != (ifv->ifv_pflags & flag)) {
-		error = (*func)(ifv->ifv_p, status);
+		error = (*func)(PARENT(ifv), status);
 		if (error)
 			return (error);
 		ifv->ifv_pflags &= ~flag;
@@ -885,15 +1212,72 @@
 static void
 vlan_link_state(struct ifnet *ifp, int link)
 {
+	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
 	struct ifvlan *ifv;
+	int i;
 
-	VLAN_LOCK();
-	LIST_FOREACH(ifv, &ifv_list, ifv_list) {
-		if (ifv->ifv_p == ifp)
+	TRUNK_LOCK(trunk);
+#ifdef VLAN_ARRAY
+	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
+		if (trunk->vlans[i] != NULL) {
+			ifv = trunk->vlans[i];
+#else
+	for (i = 0; i < (1 << trunk->hwidth); i++)
+		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) {
+#endif
+			ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate;
 			if_link_state_change(ifv->ifv_ifp,
-			    ifv->ifv_p->if_link_state);
+			    trunk->parent->if_link_state);
+		}
+	TRUNK_UNLOCK(trunk);
+}
+
+static void
+vlan_capabilities(struct ifvlan *ifv)
+{
+	struct ifnet *p = PARENT(ifv);
+	struct ifnet *ifp = ifv->ifv_ifp;
+
+	TRUNK_LOCK_ASSERT(TRUNK(ifv));
+
+	/*
+	 * If the parent interface can do checksum offloading
+	 * on VLANs, then propagate its hardware-assisted
+	 * checksumming flags. Also assert that checksum
+	 * offloading requires hardware VLAN tagging.
+	 */
+	if (p->if_capabilities & IFCAP_VLAN_HWCSUM)
+		ifp->if_capabilities = p->if_capabilities & IFCAP_HWCSUM;
+
+	if (p->if_capenable & IFCAP_VLAN_HWCSUM &&
+	    p->if_capenable & IFCAP_VLAN_HWTAGGING) {
+		ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM;
+		ifp->if_hwassist = p->if_hwassist;
+	} else {
+		ifp->if_capenable = 0;
+		ifp->if_hwassist = 0;
 	}
-	VLAN_UNLOCK();
+}
+
+static void
+vlan_trunk_capabilities(struct ifnet *ifp)
+{
+	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
+	struct ifvlan *ifv;
+	int i;
+
+	TRUNK_LOCK(trunk);
+#ifdef VLAN_ARRAY
+	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
+		if (trunk->vlans[i] != NULL) {
+			ifv = trunk->vlans[i];
+#else
+	for (i = 0; i < (1 << trunk->hwidth); i++) {
+		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
+#endif
+			vlan_capabilities(ifv);
+	}
+	TRUNK_UNLOCK(trunk);
 }
 
 static int
@@ -911,34 +1295,10 @@
 	ifv = ifp->if_softc;
 
 	switch (cmd) {
-	case SIOCSIFADDR:
-		ifp->if_flags |= IFF_UP;
-
-		switch (ifa->ifa_addr->sa_family) {
-#ifdef INET
-		case AF_INET:
-			arp_ifinit(ifv->ifv_ifp, ifa);
-			break;
-#endif
-		default:
-			break;
-		}
-		break;
-
-	case SIOCGIFADDR:
-		{
-			struct sockaddr *sa;
-
-			sa = (struct sockaddr *) &ifr->ifr_data;
-			bcopy(IFP2ENADDR(ifp), (caddr_t)sa->sa_data,
-			    ETHER_ADDR_LEN);
-		}
-		break;
-
 	case SIOCGIFMEDIA:
 		VLAN_LOCK();
-		if (ifv->ifv_p != NULL) {
-			error = (*ifv->ifv_p->if_ioctl)(ifv->ifv_p,
+		if (TRUNK(ifv) != NULL) {
+			error = (*PARENT(ifv)->if_ioctl)(PARENT(ifv),
 					SIOCGIFMEDIA, data);
 			VLAN_UNLOCK();
 			/* Limit the result to the parent's current config. */
@@ -968,9 +1328,9 @@
 		 * Set the interface MTU.
 		 */
 		VLAN_LOCK();
-		if (ifv->ifv_p != NULL) {
+		if (TRUNK(ifv) != NULL) {
 			if (ifr->ifr_mtu >
-			     (ifv->ifv_p->if_mtu - ifv->ifv_mtufudge) ||
+			     (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) ||
 			    ifr->ifr_mtu <
 			     (ifv->ifv_mintu - ifv->ifv_mtufudge))
 				error = EINVAL;
@@ -986,10 +1346,7 @@
 		if (error)
 			break;
 		if (vlr.vlr_parent[0] == '\0') {
-			VLAN_LOCK();
 			vlan_unconfig(ifp);
-			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-			VLAN_UNLOCK();
 			break;
 		}
 		p = ifunit(vlr.vlr_parent);
@@ -1005,15 +1362,9 @@
 			error = EINVAL;
 			break;
 		}
-		VLAN_LOCK();
-		error = vlan_config(ifv, p);
-		if (error) {
-			VLAN_UNLOCK();
+		error = vlan_config(ifv, p, vlr.vlr_tag);
+		if (error)
 			break;
-		}
-		ifv->ifv_tag = vlr.vlr_tag;
-		ifp->if_drv_flags |= IFF_DRV_RUNNING;
-		VLAN_UNLOCK();
 
 		/* Update flags on the parent, if necessary. */
 		vlan_setflags(ifp, 1);
@@ -1022,8 +1373,8 @@
 	case SIOCGETVLAN:
 		bzero(&vlr, sizeof(vlr));
 		VLAN_LOCK();
-		if (ifv->ifv_p) {
-			strlcpy(vlr.vlr_parent, ifv->ifv_p->if_xname,
+		if (TRUNK(ifv) != NULL) {
+			strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname,
 			    sizeof(vlr.vlr_parent));
 			vlr.vlr_tag = ifv->ifv_tag;
 		}
@@ -1036,18 +1387,22 @@
 		 * We should propagate selected flags to the parent,
 		 * e.g., promiscuous mode.
 		 */
-		if (ifv->ifv_p != NULL)
+		if (TRUNK(ifv) != NULL)
 			error = vlan_setflags(ifp, 1);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
-		/*VLAN_LOCK();*/
-		error = vlan_setmulti(ifp);
-		/*VLAN_UNLOCK();*/
+		/*
+		 * If we don't have a parent, just remember the membership for
+		 * when we do.
+		 */
+		if (TRUNK(ifv) != NULL)
+			error = vlan_setmulti(ifp);
 		break;
+
 	default:
-		error = EINVAL;
+		error = ether_ioctl(ifp, cmd, data);
 	}
 
 	return (error);
--- /dev/null
+++ sys/net/bridgestp.h
@@ -0,0 +1,396 @@
+/*	$NetBSD: if_bridgevar.h,v 1.4 2003/07/08 07:13:50 itojun Exp $	*/
+
+/*
+ * Copyright 2001 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Jason R. Thorpe for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed for the NetBSD Project by
+ *	Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *    or promote products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999, 2000 Jason L. Wright (jason at thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Jason L. Wright
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OpenBSD: if_bridge.h,v 1.14 2001/03/22 03:48:29 jason Exp
+ *
+ * $FreeBSD: src/sys/net/bridgestp.h,v 1.14 2007/08/04 21:09:04 thompsa Exp $
+ */
+
+/*
+ * Data structure and control definitions for STP interfaces.
+ */
+
+#include <sys/callout.h>
+#include <sys/queue.h>
+
+/* STP port states */
+#define	BSTP_IFSTATE_DISABLED	0
+#define	BSTP_IFSTATE_LISTENING	1
+#define	BSTP_IFSTATE_LEARNING	2
+#define	BSTP_IFSTATE_FORWARDING	3
+#define	BSTP_IFSTATE_BLOCKING	4
+#define	BSTP_IFSTATE_DISCARDING	5
+
+#define	BSTP_TCSTATE_ACTIVE	1
+#define	BSTP_TCSTATE_DETECTED	2
+#define	BSTP_TCSTATE_INACTIVE	3
+#define	BSTP_TCSTATE_LEARNING	4
+#define	BSTP_TCSTATE_PROPAG	5
+#define	BSTP_TCSTATE_ACK	6
+#define	BSTP_TCSTATE_TC		7
+#define	BSTP_TCSTATE_TCN	8
+
+#define	BSTP_ROLE_DISABLED	0
+#define	BSTP_ROLE_ROOT		1
+#define	BSTP_ROLE_DESIGNATED	2
+#define	BSTP_ROLE_ALTERNATE	3
+#define	BSTP_ROLE_BACKUP	4
+
+#ifdef _KERNEL
+
+/* STP port flags */
+#define	BSTP_PORT_CANMIGRATE	0x0001
+#define	BSTP_PORT_NEWINFO	0x0002
+#define	BSTP_PORT_DISPUTED	0x0004
+#define	BSTP_PORT_ADMCOST	0x0008
+#define	BSTP_PORT_AUTOEDGE	0x0010
+#define	BSTP_PORT_AUTOPTP	0x0020
+#define	BSTP_PORT_ADMEDGE	0x0040
+#define	BSTP_PORT_PNDCOST	0x0080
+
+/* BPDU priority */
+#define	BSTP_PDU_SUPERIOR	1
+#define	BSTP_PDU_REPEATED	2
+#define	BSTP_PDU_INFERIOR	3
+#define	BSTP_PDU_INFERIORALT	4
+#define	BSTP_PDU_OTHER		5
+
+/* BPDU flags */
+#define	BSTP_PDU_PRMASK		0x0c		/* Port Role */
+#define	BSTP_PDU_PRSHIFT	2		/* Port Role offset */
+#define	BSTP_PDU_F_UNKN		0x00		/* Unknown port    (00) */
+#define	BSTP_PDU_F_ALT		0x01		/* Alt/Backup port (01) */
+#define	BSTP_PDU_F_ROOT		0x02		/* Root port       (10) */
+#define	BSTP_PDU_F_DESG		0x03		/* Designated port (11) */
+
+#define	BSTP_PDU_STPMASK	0x81		/* strip unused STP flags */
+#define	BSTP_PDU_RSTPMASK	0x7f		/* strip unused RSTP flags */
+#define	BSTP_PDU_F_TC		0x01		/* Topology change */
+#define	BSTP_PDU_F_P		0x02		/* Proposal flag */
+#define	BSTP_PDU_F_L		0x10		/* Learning flag */
+#define	BSTP_PDU_F_F		0x20		/* Forwarding flag */
+#define	BSTP_PDU_F_A		0x40		/* Agreement flag */
+#define	BSTP_PDU_F_TCA		0x80		/* Topology change ack */
+
+/*
+ * Spanning tree defaults.
+ */
+#define	BSTP_DEFAULT_MAX_AGE		(20 * 256)
+#define	BSTP_DEFAULT_HELLO_TIME		(2 * 256)
+#define	BSTP_DEFAULT_FORWARD_DELAY	(15 * 256)
+#define	BSTP_DEFAULT_HOLD_TIME		(1 * 256)
+#define	BSTP_DEFAULT_MIGRATE_DELAY	(3 * 256)
+#define	BSTP_DEFAULT_HOLD_COUNT		6
+#define	BSTP_DEFAULT_BRIDGE_PRIORITY	0x8000
+#define	BSTP_DEFAULT_PORT_PRIORITY	0x80
+#define	BSTP_DEFAULT_PATH_COST		55
+#define	BSTP_MIN_HELLO_TIME		(1 * 256)
+#define	BSTP_MIN_MAX_AGE		(6 * 256)
+#define	BSTP_MIN_FORWARD_DELAY		(4 * 256)
+#define	BSTP_MIN_HOLD_COUNT		1
+#define	BSTP_MAX_HELLO_TIME		(2 * 256)
+#define	BSTP_MAX_MAX_AGE		(40 * 256)
+#define	BSTP_MAX_FORWARD_DELAY		(30 * 256)
+#define	BSTP_MAX_HOLD_COUNT		10
+#define	BSTP_MAX_PRIORITY		61440
+#define	BSTP_MAX_PORT_PRIORITY		240
+#define	BSTP_MAX_PATH_COST		200000000
+
+/* BPDU message types */
+#define	BSTP_MSGTYPE_CFG	0x00		/* Configuration */
+#define	BSTP_MSGTYPE_RSTP	0x02		/* Rapid STP */
+#define	BSTP_MSGTYPE_TCN	0x80		/* Topology chg notification */
+
+/* Protocol versions */
+#define	BSTP_PROTO_ID		0x00
+#define	BSTP_PROTO_STP		0x00
+#define	BSTP_PROTO_RSTP		0x02
+#define	BSTP_PROTO_MAX		BSTP_PROTO_RSTP
+
+#define	BSTP_INFO_RECIEVED	1
+#define	BSTP_INFO_MINE		2
+#define	BSTP_INFO_AGED		3
+#define	BSTP_INFO_DISABLED	4
+
+
+#define	BSTP_MESSAGE_AGE_INCR	(1 * 256)	/* in 256ths of a second */
+#define	BSTP_TICK_VAL		(1 * 256)	/* in 256ths of a second */
+#define	BSTP_LINK_TIMER		(BSTP_TICK_VAL * 15)
+
+/*
+ * Driver callbacks for STP state changes
+ */
+typedef void (*bstp_state_cb_t)(struct ifnet *, int);
+typedef void (*bstp_rtage_cb_t)(struct ifnet *, int);
+struct bstp_cb_ops {
+	bstp_state_cb_t	bcb_state;
+	bstp_rtage_cb_t	bcb_rtage;
+};
+
+/*
+ * Because BPDU's do not make nicely aligned structures, two different
+ * declarations are used: bstp_?bpdu (wire representation, packed) and
+ * bstp_*_unit (internal, nicely aligned version).
+ */
+
+/* configuration bridge protocol data unit */
+struct bstp_cbpdu {
+	uint8_t		cbu_dsap;		/* LLC: destination sap */
+	uint8_t		cbu_ssap;		/* LLC: source sap */
+	uint8_t		cbu_ctl;		/* LLC: control */
+	uint16_t	cbu_protoid;		/* protocol id */
+	uint8_t		cbu_protover;		/* protocol version */
+	uint8_t		cbu_bpdutype;		/* message type */
+	uint8_t		cbu_flags;		/* flags (below) */
+
+	/* root id */
+	uint16_t	cbu_rootpri;		/* root priority */
+	uint8_t		cbu_rootaddr[6];	/* root address */
+
+	uint32_t	cbu_rootpathcost;	/* root path cost */
+
+	/* bridge id */
+	uint16_t	cbu_bridgepri;		/* bridge priority */
+	uint8_t		cbu_bridgeaddr[6];	/* bridge address */
+
+	uint16_t	cbu_portid;		/* port id */
+	uint16_t	cbu_messageage;		/* current message age */
+	uint16_t	cbu_maxage;		/* maximum age */
+	uint16_t	cbu_hellotime;		/* hello time */
+	uint16_t	cbu_forwarddelay;	/* forwarding delay */
+	uint8_t		cbu_versionlen;		/* version 1 length */
+} __packed;
+#define	BSTP_BPDU_STP_LEN	(3 + 35)	/* LLC + STP pdu */
+#define	BSTP_BPDU_RSTP_LEN	(3 + 36)	/* LLC + RSTP pdu */
+
+/* topology change notification bridge protocol data unit */
+struct bstp_tbpdu {
+	uint8_t		tbu_dsap;		/* LLC: destination sap */
+	uint8_t		tbu_ssap;		/* LLC: source sap */
+	uint8_t		tbu_ctl;		/* LLC: control */
+	uint16_t	tbu_protoid;		/* protocol id */
+	uint8_t		tbu_protover;		/* protocol version */
+	uint8_t		tbu_bpdutype;		/* message type */
+} __packed;
+
+/*
+ * Timekeeping structure used in spanning tree code.
+ */
+struct bstp_timer {
+	int		active;
+	int		latched;
+	int		value;
+};
+
+struct bstp_pri_vector {
+	uint64_t		pv_root_id;
+	uint32_t		pv_cost;
+	uint64_t		pv_dbridge_id;
+	uint16_t		pv_dport_id;
+	uint16_t		pv_port_id;
+};
+
+struct bstp_config_unit {
+	struct bstp_pri_vector	cu_pv;
+	uint16_t	cu_message_age;
+	uint16_t	cu_max_age;
+	uint16_t	cu_forward_delay;
+	uint16_t	cu_hello_time;
+	uint8_t		cu_message_type;
+	uint8_t		cu_topology_change_ack;
+	uint8_t		cu_topology_change;
+	uint8_t		cu_proposal;
+	uint8_t		cu_agree;
+	uint8_t		cu_learning;
+	uint8_t		cu_forwarding;
+	uint8_t		cu_role;
+};
+
+struct bstp_tcn_unit {
+	uint8_t		tu_message_type;
+};
+
+struct bstp_port {
+	LIST_ENTRY(bstp_port)	bp_next;
+	struct ifnet		*bp_ifp;	/* parent if */
+	struct bstp_state	*bp_bs;
+	uint8_t			bp_active;
+	uint8_t			bp_protover;
+	uint32_t		bp_flags;
+	uint32_t		bp_path_cost;
+	uint16_t		bp_port_msg_age;
+	uint16_t		bp_port_max_age;
+	uint16_t		bp_port_fdelay;
+	uint16_t		bp_port_htime;
+	uint16_t		bp_desg_msg_age;
+	uint16_t		bp_desg_max_age;
+	uint16_t		bp_desg_fdelay;
+	uint16_t		bp_desg_htime;
+	struct bstp_timer	bp_edge_delay_timer;
+	struct bstp_timer	bp_forward_delay_timer;
+	struct bstp_timer	bp_hello_timer;
+	struct bstp_timer	bp_message_age_timer;
+	struct bstp_timer	bp_migrate_delay_timer;
+	struct bstp_timer	bp_recent_backup_timer;
+	struct bstp_timer	bp_recent_root_timer;
+	struct bstp_timer	bp_tc_timer;
+	struct bstp_config_unit bp_msg_cu;
+	struct bstp_pri_vector	bp_desg_pv;
+	struct bstp_pri_vector	bp_port_pv;
+	uint16_t		bp_port_id;
+	uint8_t			bp_state;
+	uint8_t			bp_tcstate;
+	uint8_t			bp_role;
+	uint8_t			bp_infois;
+	uint8_t			bp_tc_ack;
+	uint8_t			bp_tc_prop;
+	uint8_t			bp_fdbflush;
+	uint8_t			bp_priority;
+	uint8_t			bp_ptp_link;
+	uint8_t			bp_agree;
+	uint8_t			bp_agreed;
+	uint8_t			bp_sync;
+	uint8_t			bp_synced;
+	uint8_t			bp_proposing;
+	uint8_t			bp_proposed;
+	uint8_t			bp_operedge;
+	uint8_t			bp_reroot;
+	uint8_t			bp_rcvdtc;
+	uint8_t			bp_rcvdtca;
+	uint8_t			bp_rcvdtcn;
+	uint32_t		bp_forward_transitions;
+	uint8_t			bp_txcount;
+	struct task		bp_statetask;
+	struct task		bp_rtagetask;
+};
+
+/*
+ * Software state for each bridge STP.
+ */
+struct bstp_state {
+	LIST_ENTRY(bstp_state)	bs_list;
+	uint8_t			bs_running;
+	struct mtx		bs_mtx;
+	struct bstp_pri_vector	bs_bridge_pv;
+	struct bstp_pri_vector	bs_root_pv;
+	struct bstp_port	*bs_root_port;
+	uint8_t			bs_protover;
+	uint16_t		bs_migration_delay;
+	uint16_t		bs_edge_delay;
+	uint16_t		bs_bridge_max_age;
+	uint16_t		bs_bridge_fdelay;
+	uint16_t		bs_bridge_htime;
+	uint16_t		bs_root_msg_age;
+	uint16_t		bs_root_max_age;
+	uint16_t		bs_root_fdelay;
+	uint16_t		bs_root_htime;
+	uint16_t		bs_hold_time;
+	uint16_t		bs_bridge_priority;
+	uint8_t			bs_txholdcount;
+	uint8_t			bs_allsynced;
+	struct callout		bs_bstpcallout;	/* STP callout */
+	struct bstp_timer	bs_link_timer;
+	struct timeval		bs_last_tc_time;
+	LIST_HEAD(, bstp_port)	bs_bplist;
+	bstp_state_cb_t		bs_state_cb;
+	bstp_rtage_cb_t		bs_rtage_cb;
+};
+
+#define	BSTP_LOCK_INIT(_bs)	mtx_init(&(_bs)->bs_mtx, "bstp", NULL, MTX_DEF)
+#define	BSTP_LOCK_DESTROY(_bs)	mtx_destroy(&(_bs)->bs_mtx)
+#define	BSTP_LOCK(_bs)		mtx_lock(&(_bs)->bs_mtx)
+#define	BSTP_UNLOCK(_bs)	mtx_unlock(&(_bs)->bs_mtx)
+#define	BSTP_LOCK_ASSERT(_bs)	mtx_assert(&(_bs)->bs_mtx, MA_OWNED)
+
+extern const uint8_t bstp_etheraddr[];
+
+extern	void (*bstp_linkstate_p)(struct ifnet *ifp, int state);
+
+void	bstp_attach(struct bstp_state *, struct bstp_cb_ops *);
+void	bstp_detach(struct bstp_state *);
+void	bstp_init(struct bstp_state *);
+void	bstp_stop(struct bstp_state *);
+int	bstp_create(struct bstp_state *, struct bstp_port *, struct ifnet *);
+int	bstp_enable(struct bstp_port *);
+void	bstp_disable(struct bstp_port *);
+void	bstp_destroy(struct bstp_port *);
+void	bstp_linkstate(struct ifnet *, int);
+int	bstp_set_htime(struct bstp_state *, int);
+int	bstp_set_fdelay(struct bstp_state *, int);
+int	bstp_set_maxage(struct bstp_state *, int);
+int	bstp_set_holdcount(struct bstp_state *, int);
+int	bstp_set_protocol(struct bstp_state *, int);
+int	bstp_set_priority(struct bstp_state *, int);
+int	bstp_set_port_priority(struct bstp_port *, int);
+int	bstp_set_path_cost(struct bstp_port *, uint32_t);
+int	bstp_set_edge(struct bstp_port *, int);
+int	bstp_set_autoedge(struct bstp_port *, int);
+int	bstp_set_ptp(struct bstp_port *, int);
+int	bstp_set_autoptp(struct bstp_port *, int);
+struct mbuf *bstp_input(struct bstp_port *, struct ifnet *, struct mbuf *);
+
+#endif /* _KERNEL */
Index: iso88025.h
===================================================================
RCS file: /home/cvs/src/sys/net/iso88025.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/iso88025.h -L sys/net/iso88025.h -u -r1.1.1.1 -r1.2
--- sys/net/iso88025.h
+++ sys/net/iso88025.h
@@ -30,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/net/iso88025.h,v 1.15 2005/04/13 08:14:14 mdodd Exp $
+ * $FreeBSD: src/sys/net/iso88025.h,v 1.16 2005/11/11 07:36:14 ru Exp $
  *
  * Information gathered from tokenring at freebsd, /sys/net/ethernet.h and
  * the Mach token ring driver.
@@ -162,7 +162,7 @@
 #define	ISO88025_BPF_UNSUPPORTED	0
 #define	ISO88025_BPF_SUPPORTED		1
 
-void	iso88025_ifattach	(struct ifnet *, int);
+void	iso88025_ifattach	(struct ifnet *, const u_int8_t *, int);
 void	iso88025_ifdetach	(struct ifnet *, int);
 int	iso88025_ioctl		(struct ifnet *, int , caddr_t );
 int	iso88025_output		(struct ifnet *, struct mbuf *, struct sockaddr *,
Index: bridgestp.c
===================================================================
RCS file: /home/cvs/src/sys/net/bridgestp.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/net/bridgestp.c -L sys/net/bridgestp.c -u -r1.2 -r1.3
--- sys/net/bridgestp.c
+++ sys/net/bridgestp.c
@@ -2,6 +2,7 @@
 
 /*
  * Copyright (c) 2000 Jason L. Wright (jason at thought.net)
+ * Copyright (c) 2006 Andrew Thompson (thompsa at FreeBSD.org)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -12,11 +13,6 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *      This product includes software developed by Jason L. Wright
- * 4. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
@@ -35,12 +31,11 @@
 
 /*
  * Implementation of the spanning tree protocol as defined in
- * ISO/IEC Final DIS 15802-3 (IEEE P802.1D/D17), May 25, 1998.
- * (In English: IEEE 802.1D, Draft 17, 1998)
+ * ISO/IEC 802.1D-2004, June 9, 2004.
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/net/bridgestp.c,v 1.3.2.8 2006/03/09 08:21:19 thompsa Exp $");
+__FBSDID("$FreeBSD: src/sys/net/bridgestp.c,v 1.39 2007/08/18 12:06:13 thompsa Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -49,9 +44,11 @@
 #include <sys/sockio.h>
 #include <sys/kernel.h>
 #include <sys/callout.h>
+#include <sys/module.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/taskqueue.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
@@ -63,324 +60,176 @@
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
-#include <net/if_bridgevar.h>
+#include <net/bridgestp.h>
 
-/* BPDU message types */
-#define	BSTP_MSGTYPE_CFG	0x00		/* Configuration */
-#define	BSTP_MSGTYPE_TCN	0x80		/* Topology chg notification */
-
-/* BPDU flags */
-#define	BSTP_FLAG_TC		0x01		/* Topology change */
-#define	BSTP_FLAG_TCA		0x80		/* Topology change ack */
-
-#define	BSTP_MESSAGE_AGE_INCR	(1 * 256)	/* in 256ths of a second */
-#define	BSTP_TICK_VAL		(1 * 256)	/* in 256ths of a second */
-#define	BSTP_LINK_TIMER		(BSTP_TICK_VAL * 15)
-
-/*
- * Because BPDU's do not make nicely aligned structures, two different
- * declarations are used: bstp_?bpdu (wire representation, packed) and
- * bstp_*_unit (internal, nicely aligned version).
- */
-
-/* configuration bridge protocol data unit */
-struct bstp_cbpdu {
-	uint8_t		cbu_dsap;		/* LLC: destination sap */
-	uint8_t		cbu_ssap;		/* LLC: source sap */
-	uint8_t		cbu_ctl;		/* LLC: control */
-	uint16_t	cbu_protoid;		/* protocol id */
-	uint8_t		cbu_protover;		/* protocol version */
-	uint8_t		cbu_bpdutype;		/* message type */
-	uint8_t		cbu_flags;		/* flags (below) */
-
-	/* root id */
-	uint16_t	cbu_rootpri;		/* root priority */
-	uint8_t	cbu_rootaddr[6];	/* root address */
-
-	uint32_t	cbu_rootpathcost;	/* root path cost */
-
-	/* bridge id */
-	uint16_t	cbu_bridgepri;		/* bridge priority */
-	uint8_t		cbu_bridgeaddr[6];	/* bridge address */
-
-	uint16_t	cbu_portid;		/* port id */
-	uint16_t	cbu_messageage;		/* current message age */
-	uint16_t	cbu_maxage;		/* maximum age */
-	uint16_t	cbu_hellotime;		/* hello time */
-	uint16_t	cbu_forwarddelay;	/* forwarding delay */
-} __attribute__((__packed__));
-
-/* topology change notification bridge protocol data unit */
-struct bstp_tbpdu {
-	uint8_t		tbu_dsap;		/* LLC: destination sap */
-	uint8_t		tbu_ssap;		/* LLC: source sap */
-	uint8_t		tbu_ctl;		/* LLC: control */
-	uint16_t	tbu_protoid;		/* protocol id */
-	uint8_t		tbu_protover;		/* protocol version */
-	uint8_t		tbu_bpdutype;		/* message type */
-} __attribute__((__packed__));
+#ifdef	BRIDGESTP_DEBUG
+#define	DPRINTF(fmt, arg...)	printf("bstp: " fmt, ##arg)
+#else
+#define	DPRINTF(fmt, arg...)
+#endif
+
+#define	PV2ADDR(pv, eaddr)	do {		\
+	eaddr[0] = pv >> 40;			\
+	eaddr[1] = pv >> 32;			\
+	eaddr[2] = pv >> 24;			\
+	eaddr[3] = pv >> 16;			\
+	eaddr[4] = pv >> 8;			\
+	eaddr[5] = pv >> 0;			\
+} while (0)
+
+#define	INFO_BETTER	1
+#define	INFO_SAME	0
+#define	INFO_WORSE	-1
 
 const uint8_t bstp_etheraddr[] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
 
-static void	bstp_initialize_port(struct bridge_softc *,
-		    struct bridge_iflist *);
-static void	bstp_ifupdstatus(struct bridge_softc *, struct bridge_iflist *);
-static void	bstp_enable_port(struct bridge_softc *, struct bridge_iflist *);
-static void	bstp_disable_port(struct bridge_softc *,
-		    struct bridge_iflist *);
-#ifdef notused
-static void	bstp_enable_change_detection(struct bridge_iflist *);
-static void	bstp_disable_change_detection(struct bridge_iflist *);
-#endif /* notused */
-static int	bstp_root_bridge(struct bridge_softc *sc);
-static int	bstp_supersedes_port_info(struct bridge_softc *,
-		    struct bridge_iflist *, struct bstp_config_unit *);
-static int	bstp_designated_port(struct bridge_softc *,
-		    struct bridge_iflist *);
-static int	bstp_designated_for_some_port(struct bridge_softc *);
-static void	bstp_transmit_config(struct bridge_softc *,
-		    struct bridge_iflist *);
-static void	bstp_transmit_tcn(struct bridge_softc *);
-static void	bstp_received_config_bpdu(struct bridge_softc *,
-		    struct bridge_iflist *, struct bstp_config_unit *);
-static void	bstp_received_tcn_bpdu(struct bridge_softc *,
-		    struct bridge_iflist *, struct bstp_tcn_unit *);
-static void	bstp_record_config_information(struct bridge_softc *,
-		    struct bridge_iflist *, struct bstp_config_unit *);
-static void	bstp_record_config_timeout_values(struct bridge_softc *,
-		    struct bstp_config_unit *);
-static void	bstp_config_bpdu_generation(struct bridge_softc *);
-static void	bstp_send_config_bpdu(struct bridge_softc *,
-		    struct bridge_iflist *, struct bstp_config_unit *);
-static void	bstp_configuration_update(struct bridge_softc *);
-static void	bstp_root_selection(struct bridge_softc *);
-static void	bstp_designated_port_selection(struct bridge_softc *);
-static void	bstp_become_designated_port(struct bridge_softc *,
-		    struct bridge_iflist *);
-static void	bstp_port_state_selection(struct bridge_softc *);
-static void	bstp_make_forwarding(struct bridge_softc *,
-		    struct bridge_iflist *);
-static void	bstp_make_blocking(struct bridge_softc *,
-		    struct bridge_iflist *);
-static void	bstp_set_port_state(struct bridge_iflist *, uint8_t);
-#ifdef notused
-static void	bstp_set_bridge_priority(struct bridge_softc *, uint64_t);
-static void	bstp_set_port_priority(struct bridge_softc *,
-		    struct bridge_iflist *, uint16_t);
-static void	bstp_set_path_cost(struct bridge_softc *,
-		    struct bridge_iflist *, uint32_t);
-#endif /* notused */
-static void	bstp_topology_change_detection(struct bridge_softc *);
-static void	bstp_topology_change_acknowledged(struct bridge_softc *);
-static void	bstp_acknowledge_topology_change(struct bridge_softc *,
-		    struct bridge_iflist *);
+LIST_HEAD(, bstp_state) bstp_list;
+static struct mtx	bstp_list_mtx;
 
+static void	bstp_transmit(struct bstp_state *, struct bstp_port *);
+static void	bstp_transmit_bpdu(struct bstp_state *, struct bstp_port *);
+static void	bstp_transmit_tcn(struct bstp_state *, struct bstp_port *);
+static void	bstp_decode_bpdu(struct bstp_port *, struct bstp_cbpdu *,
+		    struct bstp_config_unit *);
+static void	bstp_send_bpdu(struct bstp_state *, struct bstp_port *,
+		    struct bstp_cbpdu *);
+static void	bstp_enqueue(struct ifnet *, struct mbuf *);
+static int	bstp_pdu_flags(struct bstp_port *);
+static void	bstp_received_stp(struct bstp_state *, struct bstp_port *,
+		    struct mbuf **, struct bstp_tbpdu *);
+static void	bstp_received_rstp(struct bstp_state *, struct bstp_port *,
+		    struct mbuf **, struct bstp_tbpdu *);
+static void	bstp_received_tcn(struct bstp_state *, struct bstp_port *,
+		    struct bstp_tcn_unit *);
+static void	bstp_received_bpdu(struct bstp_state *, struct bstp_port *,
+		    struct bstp_config_unit *);
+static int	bstp_pdu_rcvtype(struct bstp_port *, struct bstp_config_unit *);
+static int	bstp_pdu_bettersame(struct bstp_port *, int);
+static int	bstp_info_cmp(struct bstp_pri_vector *,
+		    struct bstp_pri_vector *);
+static int	bstp_info_superior(struct bstp_pri_vector *,
+		    struct bstp_pri_vector *);
+static void	bstp_assign_roles(struct bstp_state *);
+static void	bstp_update_roles(struct bstp_state *, struct bstp_port *);
+static void	bstp_update_state(struct bstp_state *, struct bstp_port *);
+static void	bstp_update_tc(struct bstp_port *);
+static void	bstp_update_info(struct bstp_port *);
+static void	bstp_set_other_tcprop(struct bstp_port *);
+static void	bstp_set_all_reroot(struct bstp_state *);
+static void	bstp_set_all_sync(struct bstp_state *);
+static void	bstp_set_port_state(struct bstp_port *, int);
+static void	bstp_set_port_role(struct bstp_port *, int);
+static void	bstp_set_port_proto(struct bstp_port *, int);
+static void	bstp_set_port_tc(struct bstp_port *, int);
+static void	bstp_set_timer_tc(struct bstp_port *);
+static void	bstp_set_timer_msgage(struct bstp_port *);
+static int	bstp_rerooted(struct bstp_state *, struct bstp_port *);
+static uint32_t	bstp_calc_path_cost(struct bstp_port *);
+static void	bstp_notify_state(void *, int);
+static void	bstp_notify_rtage(void *, int);
+static void	bstp_ifupdstatus(struct bstp_state *, struct bstp_port *);
+static void	bstp_enable_port(struct bstp_state *, struct bstp_port *);
+static void	bstp_disable_port(struct bstp_state *, struct bstp_port *);
 static void	bstp_tick(void *);
-static void	bstp_timer_start(struct bridge_timer *, uint16_t);
-static void	bstp_timer_stop(struct bridge_timer *);
-static int	bstp_timer_expired(struct bridge_timer *, uint16_t);
-
-static void	bstp_hold_timer_expiry(struct bridge_softc *,
-		    struct bridge_iflist *);
-static void	bstp_message_age_timer_expiry(struct bridge_softc *,
-		    struct bridge_iflist *);
-static void	bstp_forward_delay_timer_expiry(struct bridge_softc *,
-		    struct bridge_iflist *);
-static void	bstp_topology_change_timer_expiry(struct bridge_softc *);
-static void	bstp_tcn_timer_expiry(struct bridge_softc *);
-static void	bstp_hello_timer_expiry(struct bridge_softc *);
+static void	bstp_timer_start(struct bstp_timer *, uint16_t);
+static void	bstp_timer_stop(struct bstp_timer *);
+static void	bstp_timer_latch(struct bstp_timer *);
+static int	bstp_timer_expired(struct bstp_timer *);
+static void	bstp_hello_timer_expiry(struct bstp_state *,
+		    struct bstp_port *);
+static void	bstp_message_age_expiry(struct bstp_state *,
+		    struct bstp_port *);
+static void	bstp_migrate_delay_expiry(struct bstp_state *,
+		    struct bstp_port *);
+static void	bstp_edge_delay_expiry(struct bstp_state *,
+		    struct bstp_port *);
 static int	bstp_addr_cmp(const uint8_t *, const uint8_t *);
+static int	bstp_same_bridgeid(uint64_t, uint64_t);
+static void	bstp_reinit(struct bstp_state *);
 
 static void
-bstp_transmit_config(struct bridge_softc *sc, struct bridge_iflist *bif)
+bstp_transmit(struct bstp_state *bs, struct bstp_port *bp)
 {
-	if (bif->bif_hold_timer.active) {
-		bif->bif_config_pending = 1;
+	if (bs->bs_running == 0)
 		return;
-	}
 
-	bif->bif_config_bpdu.cu_message_type = BSTP_MSGTYPE_CFG;
-	bif->bif_config_bpdu.cu_rootid = sc->sc_designated_root;
-	bif->bif_config_bpdu.cu_root_path_cost = sc->sc_root_path_cost;
-	bif->bif_config_bpdu.cu_bridge_id = sc->sc_bridge_id;
-	bif->bif_config_bpdu.cu_port_id = bif->bif_port_id;
-
-	if (bstp_root_bridge(sc))
-		bif->bif_config_bpdu.cu_message_age = 0;
-	else
-		bif->bif_config_bpdu.cu_message_age =
-		    sc->sc_root_port->bif_message_age_timer.value +
-		    BSTP_MESSAGE_AGE_INCR;
-
-	bif->bif_config_bpdu.cu_max_age = sc->sc_max_age;
-	bif->bif_config_bpdu.cu_hello_time = sc->sc_hello_time;
-	bif->bif_config_bpdu.cu_forward_delay = sc->sc_forward_delay;
-	bif->bif_config_bpdu.cu_topology_change_acknowledgment
-	    = bif->bif_topology_change_acknowledge;
-	bif->bif_config_bpdu.cu_topology_change = sc->sc_topology_change;
+	/*
+	 * a PDU can only be sent if we have tx quota left and the
+	 * hello timer is running.
+	 */
+	if (bp->bp_hello_timer.active == 0) {
+		/* Test if it needs to be reset */
+		bstp_hello_timer_expiry(bs, bp);
+		return;
+	}
+	if (bp->bp_txcount > bs->bs_txholdcount)
+		/* Ran out of karma */
+		return;
 
-	if (bif->bif_config_bpdu.cu_message_age < sc->sc_max_age) {
-		bif->bif_topology_change_acknowledge = 0;
-		bif->bif_config_pending = 0;
-		bstp_send_config_bpdu(sc, bif, &bif->bif_config_bpdu);
-		bstp_timer_start(&bif->bif_hold_timer, 0);
+	if (bp->bp_protover == BSTP_PROTO_RSTP) {
+		bstp_transmit_bpdu(bs, bp);
+		bp->bp_tc_ack = 0;
+	} else { /* STP */
+		switch (bp->bp_role) {
+			case BSTP_ROLE_DESIGNATED:
+				bstp_transmit_bpdu(bs, bp);
+				bp->bp_tc_ack = 0;
+				break;
+
+			case BSTP_ROLE_ROOT:
+				bstp_transmit_tcn(bs, bp);
+				break;
+		}
 	}
+	bstp_timer_start(&bp->bp_hello_timer, bp->bp_desg_htime);
+	bp->bp_flags &= ~BSTP_PORT_NEWINFO;
 }
 
 static void
-bstp_send_config_bpdu(struct bridge_softc *sc, struct bridge_iflist *bif,
-    struct bstp_config_unit *cu)
+bstp_transmit_bpdu(struct bstp_state *bs, struct bstp_port *bp)
 {
-	struct ifnet *ifp;
-	struct mbuf *m;
-	struct ether_header *eh;
 	struct bstp_cbpdu bpdu;
 
-	BRIDGE_LOCK_ASSERT(sc);
-
-	ifp = bif->bif_ifp;
-
-	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
-		return;
-
-	MGETHDR(m, M_DONTWAIT, MT_DATA);
-	if (m == NULL)
-		return;
-
-	eh = mtod(m, struct ether_header *);
-
-	m->m_pkthdr.rcvif = ifp;
-	m->m_pkthdr.len = sizeof(*eh) + sizeof(bpdu);
-	m->m_len = m->m_pkthdr.len;
-
-	bpdu.cbu_ssap = bpdu.cbu_dsap = LLC_8021D_LSAP;
-	bpdu.cbu_ctl = LLC_UI;
-	bpdu.cbu_protoid = htons(0);
-	bpdu.cbu_protover = 0;
-	bpdu.cbu_bpdutype = cu->cu_message_type;
-	bpdu.cbu_flags = (cu->cu_topology_change ? BSTP_FLAG_TC : 0) |
-	    (cu->cu_topology_change_acknowledgment ? BSTP_FLAG_TCA : 0);
-
-	bpdu.cbu_rootpri = htons(cu->cu_rootid >> 48);
-	bpdu.cbu_rootaddr[0] = cu->cu_rootid >> 40;
-	bpdu.cbu_rootaddr[1] = cu->cu_rootid >> 32;
-	bpdu.cbu_rootaddr[2] = cu->cu_rootid >> 24;
-	bpdu.cbu_rootaddr[3] = cu->cu_rootid >> 16;
-	bpdu.cbu_rootaddr[4] = cu->cu_rootid >> 8;
-	bpdu.cbu_rootaddr[5] = cu->cu_rootid >> 0;
-
-	bpdu.cbu_rootpathcost = htonl(cu->cu_root_path_cost);
-
-	bpdu.cbu_bridgepri = htons(cu->cu_bridge_id >> 48);
-	bpdu.cbu_bridgeaddr[0] = cu->cu_bridge_id >> 40;
-	bpdu.cbu_bridgeaddr[1] = cu->cu_bridge_id >> 32;
-	bpdu.cbu_bridgeaddr[2] = cu->cu_bridge_id >> 24;
-	bpdu.cbu_bridgeaddr[3] = cu->cu_bridge_id >> 16;
-	bpdu.cbu_bridgeaddr[4] = cu->cu_bridge_id >> 8;
-	bpdu.cbu_bridgeaddr[5] = cu->cu_bridge_id >> 0;
-
-	bpdu.cbu_portid = htons(cu->cu_port_id);
-	bpdu.cbu_messageage = htons(cu->cu_message_age);
-	bpdu.cbu_maxage = htons(cu->cu_max_age);
-	bpdu.cbu_hellotime = htons(cu->cu_hello_time);
-	bpdu.cbu_forwarddelay = htons(cu->cu_forward_delay);
-
-	memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
-	memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN);
-	eh->ether_type = htons(sizeof(bpdu));
-
-	memcpy(mtod(m, caddr_t) + sizeof(*eh), &bpdu, sizeof(bpdu));
-
-	/* XXX: safe here?!? */
-	BRIDGE_UNLOCK(sc);
-	bridge_enqueue(sc, ifp, m);
-	BRIDGE_LOCK(sc);
-}
-
-static int
-bstp_root_bridge(struct bridge_softc *sc)
-{
-	return (sc->sc_designated_root == sc->sc_bridge_id);
-}
-
-static int
-bstp_supersedes_port_info(struct bridge_softc *sc, struct bridge_iflist *bif,
-    struct bstp_config_unit *cu)
-{
-	if (cu->cu_rootid < bif->bif_designated_root)
-		return (1);
-	if (cu->cu_rootid > bif->bif_designated_root)
-		return (0);
+	BSTP_LOCK_ASSERT(bs);
 
-	if (cu->cu_root_path_cost < bif->bif_designated_cost)
-		return (1);
-	if (cu->cu_root_path_cost > bif->bif_designated_cost)
-		return (0);
+	bpdu.cbu_rootpri = htons(bp->bp_desg_pv.pv_root_id >> 48);
+	PV2ADDR(bp->bp_desg_pv.pv_root_id, bpdu.cbu_rootaddr);
 
-	if (cu->cu_bridge_id < bif->bif_designated_bridge)
-		return (1);
-	if (cu->cu_bridge_id > bif->bif_designated_bridge)
-		return (0);
+	bpdu.cbu_rootpathcost = htonl(bp->bp_desg_pv.pv_cost);
 
-	if (sc->sc_bridge_id != cu->cu_bridge_id)
-		return (1);
-	if (cu->cu_port_id <= bif->bif_designated_port)
-		return (1);
-	return (0);
-}
+	bpdu.cbu_bridgepri = htons(bp->bp_desg_pv.pv_dbridge_id >> 48);
+	PV2ADDR(bp->bp_desg_pv.pv_dbridge_id, bpdu.cbu_bridgeaddr);
 
-static void
-bstp_record_config_information(struct bridge_softc *sc,
-    struct bridge_iflist *bif, struct bstp_config_unit *cu)
-{
-	bif->bif_designated_root = cu->cu_rootid;
-	bif->bif_designated_cost = cu->cu_root_path_cost;
-	bif->bif_designated_bridge = cu->cu_bridge_id;
-	bif->bif_designated_port = cu->cu_port_id;
-	bstp_timer_start(&bif->bif_message_age_timer, cu->cu_message_age);
-}
+	bpdu.cbu_portid = htons(bp->bp_port_id);
+	bpdu.cbu_messageage = htons(bp->bp_desg_msg_age);
+	bpdu.cbu_maxage = htons(bp->bp_desg_max_age);
+	bpdu.cbu_hellotime = htons(bp->bp_desg_htime);
+	bpdu.cbu_forwarddelay = htons(bp->bp_desg_fdelay);
 
-static void
-bstp_record_config_timeout_values(struct bridge_softc *sc,
-    struct bstp_config_unit *config)
-{
-	sc->sc_max_age = config->cu_max_age;
-	sc->sc_hello_time = config->cu_hello_time;
-	sc->sc_forward_delay = config->cu_forward_delay;
-	sc->sc_topology_change = config->cu_topology_change;
-}
+	bpdu.cbu_flags = bstp_pdu_flags(bp);
 
-static void
-bstp_config_bpdu_generation(struct bridge_softc *sc)
-{
-	struct bridge_iflist *bif;
+	switch (bp->bp_protover) {
+		case BSTP_PROTO_STP:
+			bpdu.cbu_bpdutype = BSTP_MSGTYPE_CFG;
+			break;
 
-	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		if ((bif->bif_flags & IFBIF_STP) == 0)
-			continue;
-		if (bstp_designated_port(sc, bif) &&
-		    (bif->bif_state != BSTP_IFSTATE_DISABLED))
-			bstp_transmit_config(sc, bif);
+		case BSTP_PROTO_RSTP:
+			bpdu.cbu_bpdutype = BSTP_MSGTYPE_RSTP;
+			break;
 	}
-}
 
-static int
-bstp_designated_port(struct bridge_softc *sc, struct bridge_iflist *bif)
-{
-	return ((bif->bif_designated_bridge == sc->sc_bridge_id)
-	    && (bif->bif_designated_port == bif->bif_port_id));
+	bstp_send_bpdu(bs, bp, &bpdu);
 }
 
 static void
-bstp_transmit_tcn(struct bridge_softc *sc)
+bstp_transmit_tcn(struct bstp_state *bs, struct bstp_port *bp)
 {
 	struct bstp_tbpdu bpdu;
-	struct bridge_iflist *bif = sc->sc_root_port;
-	struct ifnet *ifp = bif->bif_ifp;
+	struct ifnet *ifp = bp->bp_ifp;
 	struct ether_header *eh;
 	struct mbuf *m;
 
-	BRIDGE_LOCK_ASSERT(sc);
+	KASSERT(bp == bs->bs_root_port, ("%s: bad root port\n", __func__));
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
@@ -407,231 +256,223 @@
 
 	memcpy(mtod(m, caddr_t) + sizeof(*eh), &bpdu, sizeof(bpdu));
 
-	/* XXX: safe here?!? */
-	BRIDGE_UNLOCK(sc);
-	bridge_enqueue(sc, ifp, m);
-	BRIDGE_LOCK(sc);
+	bp->bp_txcount++;
+	bstp_enqueue(ifp, m);
 }
 
 static void
-bstp_configuration_update(struct bridge_softc *sc)
+bstp_decode_bpdu(struct bstp_port *bp, struct bstp_cbpdu *cpdu,
+    struct bstp_config_unit *cu)
 {
-	BRIDGE_LOCK_ASSERT(sc);
+	int flags;
+
+	cu->cu_pv.pv_root_id =
+	    (((uint64_t)ntohs(cpdu->cbu_rootpri)) << 48) |
+	    (((uint64_t)cpdu->cbu_rootaddr[0]) << 40) |
+	    (((uint64_t)cpdu->cbu_rootaddr[1]) << 32) |
+	    (((uint64_t)cpdu->cbu_rootaddr[2]) << 24) |
+	    (((uint64_t)cpdu->cbu_rootaddr[3]) << 16) |
+	    (((uint64_t)cpdu->cbu_rootaddr[4]) << 8) |
+	    (((uint64_t)cpdu->cbu_rootaddr[5]) << 0);
+
+	cu->cu_pv.pv_dbridge_id =
+	    (((uint64_t)ntohs(cpdu->cbu_bridgepri)) << 48) |
+	    (((uint64_t)cpdu->cbu_bridgeaddr[0]) << 40) |
+	    (((uint64_t)cpdu->cbu_bridgeaddr[1]) << 32) |
+	    (((uint64_t)cpdu->cbu_bridgeaddr[2]) << 24) |
+	    (((uint64_t)cpdu->cbu_bridgeaddr[3]) << 16) |
+	    (((uint64_t)cpdu->cbu_bridgeaddr[4]) << 8) |
+	    (((uint64_t)cpdu->cbu_bridgeaddr[5]) << 0);
+
+	cu->cu_pv.pv_cost = ntohl(cpdu->cbu_rootpathcost);
+	cu->cu_message_age = ntohs(cpdu->cbu_messageage);
+	cu->cu_max_age = ntohs(cpdu->cbu_maxage);
+	cu->cu_hello_time = ntohs(cpdu->cbu_hellotime);
+	cu->cu_forward_delay = ntohs(cpdu->cbu_forwarddelay);
+	cu->cu_pv.pv_dport_id = ntohs(cpdu->cbu_portid);
+	cu->cu_pv.pv_port_id = bp->bp_port_id;
+	cu->cu_message_type = cpdu->cbu_bpdutype;
+
+	/* Strip off unused flags in STP mode */
+	flags = cpdu->cbu_flags;
+	switch (cpdu->cbu_protover) {
+		case BSTP_PROTO_STP:
+			flags &= BSTP_PDU_STPMASK;
+			/* A STP BPDU explicitly conveys a Designated Port */
+			cu->cu_role = BSTP_ROLE_DESIGNATED;
+			break;
+
+		case BSTP_PROTO_RSTP:
+			flags &= BSTP_PDU_RSTPMASK;
+			break;
+	}
 
-	bstp_root_selection(sc);
-	bstp_designated_port_selection(sc);
+	cu->cu_topology_change_ack =
+		(flags & BSTP_PDU_F_TCA) ? 1 : 0;
+	cu->cu_proposal =
+		(flags & BSTP_PDU_F_P) ? 1 : 0;
+	cu->cu_agree =
+		(flags & BSTP_PDU_F_A) ? 1 : 0;
+	cu->cu_learning =
+		(flags & BSTP_PDU_F_L) ? 1 : 0;
+	cu->cu_forwarding =
+		(flags & BSTP_PDU_F_F) ? 1 : 0;
+	cu->cu_topology_change =
+		(flags & BSTP_PDU_F_TC) ? 1 : 0;
+
+	switch ((flags & BSTP_PDU_PRMASK) >> BSTP_PDU_PRSHIFT) {
+		case BSTP_PDU_F_ROOT:
+			cu->cu_role = BSTP_ROLE_ROOT;
+			break;
+		case BSTP_PDU_F_ALT:
+			cu->cu_role = BSTP_ROLE_ALTERNATE;
+			break;
+		case BSTP_PDU_F_DESG:
+			cu->cu_role = BSTP_ROLE_DESIGNATED;
+			break;
+	}
 }
 
 static void
-bstp_root_selection(struct bridge_softc *sc)
+bstp_send_bpdu(struct bstp_state *bs, struct bstp_port *bp,
+    struct bstp_cbpdu *bpdu)
 {
-	struct bridge_iflist *root_port = NULL, *bif;
+	struct ifnet *ifp;
+	struct mbuf *m;
+	struct ether_header *eh;
 
-	BRIDGE_LOCK_ASSERT(sc);
+	BSTP_LOCK_ASSERT(bs);
 
-	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		if ((bif->bif_flags & IFBIF_STP) == 0)
-			continue;
-		if (bstp_designated_port(sc, bif))
-			continue;
-		if (bif->bif_state == BSTP_IFSTATE_DISABLED)
-			continue;
-		if (bif->bif_designated_root >= sc->sc_bridge_id)
-			continue;
-		if (root_port == NULL)
-			goto set_port;
+	ifp = bp->bp_ifp;
 
-		if (bif->bif_designated_root < root_port->bif_designated_root)
-			goto set_port;
-		if (bif->bif_designated_root > root_port->bif_designated_root)
-			continue;
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+		return;
 
-		if ((bif->bif_designated_cost + bif->bif_path_cost) <
-		    (root_port->bif_designated_cost + root_port->bif_path_cost))
-			goto set_port;
-		if ((bif->bif_designated_cost + bif->bif_path_cost) >
-		    (root_port->bif_designated_cost + root_port->bif_path_cost))
-			continue;
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	if (m == NULL)
+		return;
 
-		if (bif->bif_designated_bridge <
-		    root_port->bif_designated_bridge)
-			goto set_port;
-		if (bif->bif_designated_bridge >
-		    root_port->bif_designated_bridge)
-			continue;
+	eh = mtod(m, struct ether_header *);
 
-		if (bif->bif_designated_port < root_port->bif_designated_port)
-			goto set_port;
-		if (bif->bif_designated_port > root_port->bif_designated_port)
-			continue;
+	bpdu->cbu_ssap = bpdu->cbu_dsap = LLC_8021D_LSAP;
+	bpdu->cbu_ctl = LLC_UI;
+	bpdu->cbu_protoid = htons(BSTP_PROTO_ID);
 
-		if (bif->bif_port_id >= root_port->bif_port_id)
-			continue;
-set_port:
-		root_port = bif;
-	}
+	memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+	memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN);
 
-	sc->sc_root_port = root_port;
-	if (root_port == NULL) {
-		sc->sc_designated_root = sc->sc_bridge_id;
-		sc->sc_root_path_cost = 0;
-	} else {
-		sc->sc_designated_root = root_port->bif_designated_root;
-		sc->sc_root_path_cost = root_port->bif_designated_cost +
-		    root_port->bif_path_cost;
+	switch (bpdu->cbu_bpdutype) {
+		case BSTP_MSGTYPE_CFG:
+			bpdu->cbu_protover = BSTP_PROTO_STP;
+			m->m_pkthdr.len = sizeof(*eh) + BSTP_BPDU_STP_LEN;
+			eh->ether_type = htons(BSTP_BPDU_STP_LEN);
+			memcpy(mtod(m, caddr_t) + sizeof(*eh), bpdu,
+			    BSTP_BPDU_STP_LEN);
+			break;
+
+		case BSTP_MSGTYPE_RSTP:
+			bpdu->cbu_protover = BSTP_PROTO_RSTP;
+			bpdu->cbu_versionlen = htons(0);
+			m->m_pkthdr.len = sizeof(*eh) + BSTP_BPDU_RSTP_LEN;
+			eh->ether_type = htons(BSTP_BPDU_RSTP_LEN);
+			memcpy(mtod(m, caddr_t) + sizeof(*eh), bpdu,
+			    BSTP_BPDU_RSTP_LEN);
+			break;
+
+		default:
+			panic("not implemented");
 	}
+	m->m_pkthdr.rcvif = ifp;
+	m->m_len = m->m_pkthdr.len;
+
+	bp->bp_txcount++;
+	bstp_enqueue(ifp, m);
 }
 
 static void
-bstp_designated_port_selection(struct bridge_softc *sc)
+bstp_enqueue(struct ifnet *dst_ifp, struct mbuf *m)
 {
-	struct bridge_iflist *bif;
-
-	BRIDGE_LOCK_ASSERT(sc);
-
-	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		if ((bif->bif_flags & IFBIF_STP) == 0)
-			continue;
-		if (bstp_designated_port(sc, bif))
-			goto designated;
-		if (bif->bif_designated_root != sc->sc_designated_root)
-			goto designated;
-
-		if (sc->sc_root_path_cost < bif->bif_designated_cost)
-			goto designated;
-		if (sc->sc_root_path_cost > bif->bif_designated_cost)
-			continue;
+	int err = 0;
 
-		if (sc->sc_bridge_id < bif->bif_designated_bridge)
-			goto designated;
-		if (sc->sc_bridge_id > bif->bif_designated_bridge)
-			continue;
+	IFQ_ENQUEUE(&dst_ifp->if_snd, m, err);
 
-		if (bif->bif_port_id > bif->bif_designated_port)
-			continue;
-designated:
-		bstp_become_designated_port(sc, bif);
-	}
+	if ((dst_ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0)
+		(*dst_ifp->if_start)(dst_ifp);
 }
 
-static void
-bstp_become_designated_port(struct bridge_softc *sc, struct bridge_iflist *bif)
+static int
+bstp_pdu_flags(struct bstp_port *bp)
 {
-	bif->bif_designated_root = sc->sc_designated_root;
-	bif->bif_designated_cost = sc->sc_root_path_cost;
-	bif->bif_designated_bridge = sc->sc_bridge_id;
-	bif->bif_designated_port = bif->bif_port_id;
-}
+	int flags = 0;
 
-static void
-bstp_port_state_selection(struct bridge_softc *sc)
-{
-	struct bridge_iflist *bif;
+	if (bp->bp_proposing && bp->bp_state != BSTP_IFSTATE_FORWARDING)
+		flags |= BSTP_PDU_F_P;
 
-	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		if ((bif->bif_flags & IFBIF_STP) == 0)
-			continue;
-		if (bif == sc->sc_root_port) {
-			bif->bif_config_pending = 0;
-			bif->bif_topology_change_acknowledge = 0;
-			bstp_make_forwarding(sc, bif);
-		} else if (bstp_designated_port(sc, bif)) {
-			bstp_timer_stop(&bif->bif_message_age_timer);
-			bstp_make_forwarding(sc, bif);
-		} else {
-			bif->bif_config_pending = 0;
-			bif->bif_topology_change_acknowledge = 0;
-			bstp_make_blocking(sc, bif);
-		}
-	}
-}
+	if (bp->bp_agree)
+		flags |= BSTP_PDU_F_A;
 
-static void
-bstp_make_forwarding(struct bridge_softc *sc, struct bridge_iflist *bif)
-{
-	if (bif->bif_state == BSTP_IFSTATE_BLOCKING) {
-		bstp_set_port_state(bif, BSTP_IFSTATE_LISTENING);
-		bstp_timer_start(&bif->bif_forward_delay_timer, 0);
-	}
-}
+	if (bp->bp_tc_timer.active)
+		flags |= BSTP_PDU_F_TC;
 
-static void
-bstp_make_blocking(struct bridge_softc *sc, struct bridge_iflist *bif)
-{
-	BRIDGE_LOCK_ASSERT(sc);
+	if (bp->bp_tc_ack)
+		flags |= BSTP_PDU_F_TCA;
 
-	if ((bif->bif_state != BSTP_IFSTATE_DISABLED) &&
-	    (bif->bif_state != BSTP_IFSTATE_BLOCKING)) {
-		if ((bif->bif_state == BSTP_IFSTATE_FORWARDING) ||
-		    (bif->bif_state == BSTP_IFSTATE_LEARNING)) {
-			if (bif->bif_change_detection_enabled) {
-				bstp_topology_change_detection(sc);
-			}
-		}
-		bstp_set_port_state(bif, BSTP_IFSTATE_BLOCKING);
-		bridge_rtdelete(sc, bif->bif_ifp, IFBF_FLUSHDYN);
-		bstp_timer_stop(&bif->bif_forward_delay_timer);
+	switch (bp->bp_state) {
+		case BSTP_IFSTATE_LEARNING:
+			flags |= BSTP_PDU_F_L;
+			break;
+
+		case BSTP_IFSTATE_FORWARDING:
+			flags |= (BSTP_PDU_F_L | BSTP_PDU_F_F);
+			break;
 	}
-}
 
-static void
-bstp_set_port_state(struct bridge_iflist *bif, uint8_t state)
-{
-	bif->bif_state = state;
-}
+	switch (bp->bp_role) {
+		case BSTP_ROLE_ROOT:
+			flags |=
+				(BSTP_PDU_F_ROOT << BSTP_PDU_PRSHIFT);
+			break;
 
-static void
-bstp_topology_change_detection(struct bridge_softc *sc)
-{
-	if (bstp_root_bridge(sc)) {
-		sc->sc_topology_change = 1;
-		bstp_timer_start(&sc->sc_topology_change_timer, 0);
-	} else if (!sc->sc_topology_change_detected) {
-		bstp_transmit_tcn(sc);
-		bstp_timer_start(&sc->sc_tcn_timer, 0);
-	}
-	sc->sc_topology_change_detected = 1;
-}
+		case BSTP_ROLE_ALTERNATE:
+		case BSTP_ROLE_BACKUP:	/* fall through */
+			flags |=
+				(BSTP_PDU_F_ALT << BSTP_PDU_PRSHIFT);
+			break;
 
-static void
-bstp_topology_change_acknowledged(struct bridge_softc *sc)
-{
-	sc->sc_topology_change_detected = 0;
-	bstp_timer_stop(&sc->sc_tcn_timer);
-}
+		case BSTP_ROLE_DESIGNATED:
+			flags |=
+				(BSTP_PDU_F_DESG << BSTP_PDU_PRSHIFT);
+			break;
+	}
 
-static void
-bstp_acknowledge_topology_change(struct bridge_softc *sc,
-    struct bridge_iflist *bif)
-{
-	bif->bif_topology_change_acknowledge = 1;
-	bstp_transmit_config(sc, bif);
+	/* Strip off unused flags in either mode */
+	switch (bp->bp_protover) {
+		case BSTP_PROTO_STP:
+			flags &= BSTP_PDU_STPMASK;
+			break;
+		case BSTP_PROTO_RSTP:
+			flags &= BSTP_PDU_RSTPMASK;
+			break;
+	}
+	return (flags);
 }
 
 struct mbuf *
-bstp_input(struct ifnet *ifp, struct mbuf *m)
+bstp_input(struct bstp_port *bp, struct ifnet *ifp, struct mbuf *m)
 {
-	struct bridge_softc *sc = ifp->if_bridge;
-	struct bridge_iflist *bif = NULL;
+	struct bstp_state *bs = bp->bp_bs;
 	struct ether_header *eh;
 	struct bstp_tbpdu tpdu;
-	struct bstp_cbpdu cpdu;
-	struct bstp_config_unit cu;
-	struct bstp_tcn_unit tu;
 	uint16_t len;
 
-	BRIDGE_LOCK_ASSERT(sc);
+	if (bp->bp_active == 0) {
+		m_freem(m);
+		return (NULL);
+	}
 
-	eh = mtod(m, struct ether_header *);
+	BSTP_LOCK(bs);
 
-	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		if ((bif->bif_flags & IFBIF_STP) == 0)
-			continue;
-		if (bif->bif_ifp == ifp)
-			break;
-	}
-	if (bif == NULL)
-		goto out;
+	eh = mtod(m, struct ether_header *);
 
 	len = ntohs(eh->ether_type);
 	if (len < sizeof(tpdu))
@@ -647,550 +488,1772 @@
 
 	memcpy(&tpdu, mtod(m, caddr_t), sizeof(tpdu));
 
+	/* basic packet checks */
 	if (tpdu.tbu_dsap != LLC_8021D_LSAP ||
 	    tpdu.tbu_ssap != LLC_8021D_LSAP ||
 	    tpdu.tbu_ctl != LLC_UI)
 		goto out;
-	if (tpdu.tbu_protoid != 0 || tpdu.tbu_protover != 0)
+	if (tpdu.tbu_protoid != BSTP_PROTO_ID)
 		goto out;
 
-	switch (tpdu.tbu_bpdutype) {
-	case BSTP_MSGTYPE_TCN:
-		tu.tu_message_type = tpdu.tbu_bpdutype;
-		bstp_received_tcn_bpdu(sc, bif, &tu);
-		break;
-	case BSTP_MSGTYPE_CFG:
-		if (m->m_len < sizeof(cpdu) &&
-		    (m = m_pullup(m, sizeof(cpdu))) == NULL)
+	/*
+	 * We can treat later versions of the PDU as the same as the maximum
+	 * version we implement. All additional parameters/flags are ignored.
+	 */
+	if (tpdu.tbu_protover > BSTP_PROTO_MAX)
+		tpdu.tbu_protover = BSTP_PROTO_MAX;
+
+	if (tpdu.tbu_protover != bp->bp_protover) {
+		/*
+		 * Wait for the migration delay timer to expire before changing
+		 * protocol version to avoid flip-flops.
+		 */
+		if (bp->bp_flags & BSTP_PORT_CANMIGRATE)
+			bstp_set_port_proto(bp, tpdu.tbu_protover);
+		else
 			goto out;
-		memcpy(&cpdu, mtod(m, caddr_t), sizeof(cpdu));
-
-		cu.cu_rootid =
-		    (((uint64_t)ntohs(cpdu.cbu_rootpri)) << 48) |
-		    (((uint64_t)cpdu.cbu_rootaddr[0]) << 40) |
-		    (((uint64_t)cpdu.cbu_rootaddr[1]) << 32) |
-		    (((uint64_t)cpdu.cbu_rootaddr[2]) << 24) |
-		    (((uint64_t)cpdu.cbu_rootaddr[3]) << 16) |
-		    (((uint64_t)cpdu.cbu_rootaddr[4]) << 8) |
-		    (((uint64_t)cpdu.cbu_rootaddr[5]) << 0);
-
-		cu.cu_bridge_id =
-		    (((uint64_t)ntohs(cpdu.cbu_bridgepri)) << 48) |
-		    (((uint64_t)cpdu.cbu_bridgeaddr[0]) << 40) |
-		    (((uint64_t)cpdu.cbu_bridgeaddr[1]) << 32) |
-		    (((uint64_t)cpdu.cbu_bridgeaddr[2]) << 24) |
-		    (((uint64_t)cpdu.cbu_bridgeaddr[3]) << 16) |
-		    (((uint64_t)cpdu.cbu_bridgeaddr[4]) << 8) |
-		    (((uint64_t)cpdu.cbu_bridgeaddr[5]) << 0);
-
-		cu.cu_root_path_cost = ntohl(cpdu.cbu_rootpathcost);
-		cu.cu_message_age = ntohs(cpdu.cbu_messageage);
-		cu.cu_max_age = ntohs(cpdu.cbu_maxage);
-		cu.cu_hello_time = ntohs(cpdu.cbu_hellotime);
-		cu.cu_forward_delay = ntohs(cpdu.cbu_forwarddelay);
-		cu.cu_port_id = ntohs(cpdu.cbu_portid);
-		cu.cu_message_type = cpdu.cbu_bpdutype;
-		cu.cu_topology_change_acknowledgment =
-		    (cpdu.cbu_flags & BSTP_FLAG_TCA) ? 1 : 0;
-		cu.cu_topology_change =
-		    (cpdu.cbu_flags & BSTP_FLAG_TC) ? 1 : 0;
-		bstp_received_config_bpdu(sc, bif, &cu);
-		break;
-	default:
-		goto out;
 	}
 
+	/* Clear operedge upon receiving a PDU on the port */
+	bp->bp_operedge = 0;
+	bstp_timer_start(&bp->bp_edge_delay_timer,
+	    BSTP_DEFAULT_MIGRATE_DELAY);
+
+	switch (tpdu.tbu_protover) {
+		case BSTP_PROTO_STP:
+			bstp_received_stp(bs, bp, &m, &tpdu);
+			break;
+
+		case BSTP_PROTO_RSTP:
+			bstp_received_rstp(bs, bp, &m, &tpdu);
+			break;
+	}
 out:
+	BSTP_UNLOCK(bs);
 	if (m)
 		m_freem(m);
 	return (NULL);
 }
 
 static void
-bstp_received_config_bpdu(struct bridge_softc *sc, struct bridge_iflist *bif,
-    struct bstp_config_unit *cu)
+bstp_received_stp(struct bstp_state *bs, struct bstp_port *bp,
+    struct mbuf **mp, struct bstp_tbpdu *tpdu)
 {
-	int root;
-
-	BRIDGE_LOCK_ASSERT(sc);
-
-	root = bstp_root_bridge(sc);
-
-	if (bif->bif_state != BSTP_IFSTATE_DISABLED) {
-		if (bstp_supersedes_port_info(sc, bif, cu)) {
-			bstp_record_config_information(sc, bif, cu);
-			bstp_configuration_update(sc);
-			bstp_port_state_selection(sc);
-
-			if ((bstp_root_bridge(sc) == 0) && root) {
-				bstp_timer_stop(&sc->sc_hello_timer);
-
-				if (sc->sc_topology_change_detected) {
-					bstp_timer_stop(
-					    &sc->sc_topology_change_timer);
-					bstp_transmit_tcn(sc);
-					bstp_timer_start(&sc->sc_tcn_timer, 0);
-				}
-			}
+	struct bstp_cbpdu cpdu;
+	struct bstp_config_unit *cu = &bp->bp_msg_cu;
+	struct bstp_tcn_unit tu;
 
-			if (bif == sc->sc_root_port) {
-				bstp_record_config_timeout_values(sc, cu);
-				bstp_config_bpdu_generation(sc);
+	switch (tpdu->tbu_bpdutype) {
+	case BSTP_MSGTYPE_TCN:
+		tu.tu_message_type = tpdu->tbu_bpdutype;
+		bstp_received_tcn(bs, bp, &tu);
+		break;
+	case BSTP_MSGTYPE_CFG:
+		if ((*mp)->m_len < BSTP_BPDU_STP_LEN &&
+		    (*mp = m_pullup(*mp, BSTP_BPDU_STP_LEN)) == NULL)
+			return;
+		memcpy(&cpdu, mtod(*mp, caddr_t), BSTP_BPDU_STP_LEN);
 
-				if (cu->cu_topology_change_acknowledgment)
-					bstp_topology_change_acknowledged(sc);
-			}
-		} else if (bstp_designated_port(sc, bif))
-			bstp_transmit_config(sc, bif);
+		bstp_decode_bpdu(bp, &cpdu, cu);
+		bstp_received_bpdu(bs, bp, cu);
+		break;
 	}
 }
 
 static void
-bstp_received_tcn_bpdu(struct bridge_softc *sc, struct bridge_iflist *bif,
-    struct bstp_tcn_unit *tcn)
+bstp_received_rstp(struct bstp_state *bs, struct bstp_port *bp,
+    struct mbuf **mp, struct bstp_tbpdu *tpdu)
 {
-	if (bif->bif_state != BSTP_IFSTATE_DISABLED &&
-	    bstp_designated_port(sc, bif)) {
-		bstp_topology_change_detection(sc);
-		bstp_acknowledge_topology_change(sc, bif);
-	}
+	struct bstp_cbpdu cpdu;
+	struct bstp_config_unit *cu = &bp->bp_msg_cu;
+
+	if (tpdu->tbu_bpdutype != BSTP_MSGTYPE_RSTP)
+		return;
+
+	if ((*mp)->m_len < BSTP_BPDU_RSTP_LEN &&
+	    (*mp = m_pullup(*mp, BSTP_BPDU_RSTP_LEN)) == NULL)
+		return;
+	memcpy(&cpdu, mtod(*mp, caddr_t), BSTP_BPDU_RSTP_LEN);
+
+	bstp_decode_bpdu(bp, &cpdu, cu);
+	bstp_received_bpdu(bs, bp, cu);
 }
 
 static void
-bstp_hello_timer_expiry(struct bridge_softc *sc)
+bstp_received_tcn(struct bstp_state *bs, struct bstp_port *bp,
+    struct bstp_tcn_unit *tcn)
 {
-	bstp_config_bpdu_generation(sc);
-	bstp_timer_start(&sc->sc_hello_timer, 0);
+	bp->bp_rcvdtcn = 1;
+	bstp_update_tc(bp);
 }
 
 static void
-bstp_message_age_timer_expiry(struct bridge_softc *sc,
-    struct bridge_iflist *bif)
+bstp_received_bpdu(struct bstp_state *bs, struct bstp_port *bp,
+    struct bstp_config_unit *cu)
 {
-	int root;
+	int type;
+
+	BSTP_LOCK_ASSERT(bs);
+
+	/* We need to have transitioned to INFO_MINE before proceeding */
+	switch (bp->bp_infois) {
+		case BSTP_INFO_DISABLED:
+		case BSTP_INFO_AGED:
+			return;
+	}
+
+	type = bstp_pdu_rcvtype(bp, cu);
+
+	switch (type) {
+		case BSTP_PDU_SUPERIOR:
+			bs->bs_allsynced = 0;
+			bp->bp_agreed = 0;
+			bp->bp_proposing = 0;
+
+			if (cu->cu_proposal && cu->cu_forwarding == 0)
+				bp->bp_proposed = 1;
+			if (cu->cu_topology_change)
+				bp->bp_rcvdtc = 1;
+			if (cu->cu_topology_change_ack)
+				bp->bp_rcvdtca = 1;
+
+			if (bp->bp_agree &&
+			    !bstp_pdu_bettersame(bp, BSTP_INFO_RECIEVED))
+				bp->bp_agree = 0;
+
+			/* copy the received priority and timers to the port */
+			bp->bp_port_pv = cu->cu_pv;
+			bp->bp_port_msg_age = cu->cu_message_age;
+			bp->bp_port_max_age = cu->cu_max_age;
+			bp->bp_port_fdelay = cu->cu_forward_delay;
+			bp->bp_port_htime =
+				(cu->cu_hello_time > BSTP_MIN_HELLO_TIME ?
+				 cu->cu_hello_time : BSTP_MIN_HELLO_TIME);
 
-	root = bstp_root_bridge(sc);
-	bstp_become_designated_port(sc, bif);
-	bstp_configuration_update(sc);
-	bstp_port_state_selection(sc);
+			/* set expiry for the new info */
+			bstp_set_timer_msgage(bp);
 
-	if ((bstp_root_bridge(sc)) && (root == 0)) {
-		sc->sc_max_age = sc->sc_bridge_max_age;
-		sc->sc_hello_time = sc->sc_bridge_hello_time;
-		sc->sc_forward_delay = sc->sc_bridge_forward_delay;
+			bp->bp_infois = BSTP_INFO_RECIEVED;
+			bstp_assign_roles(bs);
+			break;
+
+		case BSTP_PDU_REPEATED:
+			if (cu->cu_proposal && cu->cu_forwarding == 0)
+				bp->bp_proposed = 1;
+			if (cu->cu_topology_change)
+				bp->bp_rcvdtc = 1;
+			if (cu->cu_topology_change_ack)
+				bp->bp_rcvdtca = 1;
+
+			/* rearm the age timer */
+			bstp_set_timer_msgage(bp);
+			break;
+
+		case BSTP_PDU_INFERIOR:
+			if (cu->cu_learning) {
+				bp->bp_agreed = 1;
+				bp->bp_proposing = 0;
+			}
+			break;
+
+		case BSTP_PDU_INFERIORALT:
+			/*
+			 * only point to point links are allowed fast
+			 * transitions to forwarding.
+			 */
+			if (cu->cu_agree && bp->bp_ptp_link) {
+				bp->bp_agreed = 1;
+				bp->bp_proposing = 0;
+			} else
+				bp->bp_agreed = 0;
+
+			if (cu->cu_topology_change)
+				bp->bp_rcvdtc = 1;
+			if (cu->cu_topology_change_ack)
+				bp->bp_rcvdtca = 1;
+			break;
 
-		bstp_topology_change_detection(sc);
-		bstp_timer_stop(&sc->sc_tcn_timer);
-		bstp_config_bpdu_generation(sc);
-		bstp_timer_start(&sc->sc_hello_timer, 0);
+		case BSTP_PDU_OTHER:
+			return;	/* do nothing */
 	}
+	/* update the state machines with the new data */
+	bstp_update_state(bs, bp);
 }
 
-static void
-bstp_forward_delay_timer_expiry(struct bridge_softc *sc,
-    struct bridge_iflist *bif)
+static int
+bstp_pdu_rcvtype(struct bstp_port *bp, struct bstp_config_unit *cu)
 {
-	if (bif->bif_state == BSTP_IFSTATE_LISTENING) {
-		bstp_set_port_state(bif, BSTP_IFSTATE_LEARNING);
-		bstp_timer_start(&bif->bif_forward_delay_timer, 0);
-	} else if (bif->bif_state == BSTP_IFSTATE_LEARNING) {
-		bstp_set_port_state(bif, BSTP_IFSTATE_FORWARDING);
-		if (bstp_designated_for_some_port(sc) &&
-		    bif->bif_change_detection_enabled)
-			bstp_topology_change_detection(sc);
+	int type;
+
+	/* default return type */
+	type = BSTP_PDU_OTHER;
+
+	switch (cu->cu_role) {
+	case BSTP_ROLE_DESIGNATED:
+		if (bstp_info_superior(&bp->bp_port_pv, &cu->cu_pv))
+			/* bpdu priority is superior */
+			type = BSTP_PDU_SUPERIOR;
+		else if (bstp_info_cmp(&bp->bp_port_pv, &cu->cu_pv) ==
+		    INFO_SAME) {
+			if (bp->bp_port_msg_age != cu->cu_message_age ||
+			    bp->bp_port_max_age != cu->cu_max_age ||
+			    bp->bp_port_fdelay != cu->cu_forward_delay ||
+			    bp->bp_port_htime != cu->cu_hello_time)
+				/* bpdu priority is equal and timers differ */
+				type = BSTP_PDU_SUPERIOR;
+			else
+				/* bpdu is equal */
+				type = BSTP_PDU_REPEATED;
+		} else
+			/* bpdu priority is worse */
+			type = BSTP_PDU_INFERIOR;
+
+		break;
+
+	case BSTP_ROLE_ROOT:
+	case BSTP_ROLE_ALTERNATE:
+	case BSTP_ROLE_BACKUP:
+		if (bstp_info_cmp(&bp->bp_port_pv, &cu->cu_pv) <= INFO_SAME)
+			/*
+			 * not a designated port and priority is the same or
+			 * worse
+			 */
+			type = BSTP_PDU_INFERIORALT;
+		break;
 	}
+
+	return (type);
 }
 
 static int
-bstp_designated_for_some_port(struct bridge_softc *sc)
+bstp_pdu_bettersame(struct bstp_port *bp, int newinfo)
 {
+	if (newinfo == BSTP_INFO_RECIEVED &&
+	    bp->bp_infois == BSTP_INFO_RECIEVED &&
+	    bstp_info_cmp(&bp->bp_port_pv, &bp->bp_msg_cu.cu_pv) >= INFO_SAME)
+		return (1);
 
-	struct bridge_iflist *bif;
+	if (newinfo == BSTP_INFO_MINE &&
+	    bp->bp_infois == BSTP_INFO_MINE &&
+	    bstp_info_cmp(&bp->bp_port_pv, &bp->bp_desg_pv) >= INFO_SAME)
+		return (1);
 
-	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		if ((bif->bif_flags & IFBIF_STP) == 0)
-			continue;
-		if (bif->bif_designated_bridge == sc->sc_bridge_id)
-			return (1);
-	}
 	return (0);
 }
 
-static void
-bstp_tcn_timer_expiry(struct bridge_softc *sc)
+static int
+bstp_info_cmp(struct bstp_pri_vector *pv,
+    struct bstp_pri_vector *cpv)
 {
-	bstp_transmit_tcn(sc);
-	bstp_timer_start(&sc->sc_tcn_timer, 0);
-}
+	if (cpv->pv_root_id < pv->pv_root_id)
+		return (INFO_BETTER);
+	if (cpv->pv_root_id > pv->pv_root_id)
+		return (INFO_WORSE);
+
+	if (cpv->pv_cost < pv->pv_cost)
+		return (INFO_BETTER);
+	if (cpv->pv_cost > pv->pv_cost)
+		return (INFO_WORSE);
+
+	if (cpv->pv_dbridge_id < pv->pv_dbridge_id)
+		return (INFO_BETTER);
+	if (cpv->pv_dbridge_id > pv->pv_dbridge_id)
+		return (INFO_WORSE);
+
+	if (cpv->pv_dport_id < pv->pv_dport_id)
+		return (INFO_BETTER);
+	if (cpv->pv_dport_id > pv->pv_dport_id)
+		return (INFO_WORSE);
 
-static void
-bstp_topology_change_timer_expiry(struct bridge_softc *sc)
-{
-	sc->sc_topology_change_detected = 0;
-	sc->sc_topology_change = 0;
+	return (INFO_SAME);
 }
 
-static void
-bstp_hold_timer_expiry(struct bridge_softc *sc, struct bridge_iflist *bif)
+/*
+ * This message priority vector is superior to the port priority vector and
+ * will replace it if, and only if, the message priority vector is better than
+ * the port priority vector, or the message has been transmitted from the same
+ * designated bridge and designated port as the port priority vector.
+ */
+static int
+bstp_info_superior(struct bstp_pri_vector *pv,
+    struct bstp_pri_vector *cpv)
 {
-	if (bif->bif_config_pending)
-		bstp_transmit_config(sc, bif);
+	if (bstp_info_cmp(pv, cpv) == INFO_BETTER ||
+	    (bstp_same_bridgeid(pv->pv_dbridge_id, cpv->pv_dbridge_id) &&
+	    (cpv->pv_dport_id & 0xfff) == (pv->pv_dport_id & 0xfff)))
+		return (1);
+	return (0);
 }
 
-static int
-bstp_addr_cmp(const uint8_t *a, const uint8_t *b)
+static void
+bstp_assign_roles(struct bstp_state *bs)
 {
-	int i, d;
+	struct bstp_port *bp, *rbp = NULL;
+	struct bstp_pri_vector pv;
 
-	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
-		d = ((int)a[i]) - ((int)b[i]);
+	/* default to our priority vector */
+	bs->bs_root_pv = bs->bs_bridge_pv;
+	bs->bs_root_msg_age = 0;
+	bs->bs_root_max_age = bs->bs_bridge_max_age;
+	bs->bs_root_fdelay = bs->bs_bridge_fdelay;
+	bs->bs_root_htime = bs->bs_bridge_htime;
+	bs->bs_root_port = NULL;
+
+	/* check if any recieved info supersedes us */
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+		if (bp->bp_infois != BSTP_INFO_RECIEVED)
+			continue;
+
+		pv = bp->bp_port_pv;
+		pv.pv_cost += bp->bp_path_cost;
+
+		/*
+		 * The root priority vector is the best of the set comprising
+		 * the bridge priority vector plus all root path priority
+		 * vectors whose bridge address is not equal to us.
+		 */
+		if (bstp_same_bridgeid(pv.pv_dbridge_id,
+		    bs->bs_bridge_pv.pv_dbridge_id) == 0 &&
+		    bstp_info_cmp(&bs->bs_root_pv, &pv) == INFO_BETTER) {
+			/* the port vector replaces the root */
+			bs->bs_root_pv = pv;
+			bs->bs_root_msg_age = bp->bp_port_msg_age +
+			    BSTP_MESSAGE_AGE_INCR;
+			bs->bs_root_max_age = bp->bp_port_max_age;
+			bs->bs_root_fdelay = bp->bp_port_fdelay;
+			bs->bs_root_htime = bp->bp_port_htime;
+			rbp = bp;
+		}
 	}
 
-	return (d);
-}
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+		/* calculate the port designated vector */
+		bp->bp_desg_pv.pv_root_id = bs->bs_root_pv.pv_root_id;
+		bp->bp_desg_pv.pv_cost = bs->bs_root_pv.pv_cost;
+		bp->bp_desg_pv.pv_dbridge_id = bs->bs_bridge_pv.pv_dbridge_id;
+		bp->bp_desg_pv.pv_dport_id = bp->bp_port_id;
+		bp->bp_desg_pv.pv_port_id = bp->bp_port_id;
+
+		/* calculate designated times */
+		bp->bp_desg_msg_age = bs->bs_root_msg_age;
+		bp->bp_desg_max_age = bs->bs_root_max_age;
+		bp->bp_desg_fdelay = bs->bs_root_fdelay;
+		bp->bp_desg_htime = bs->bs_bridge_htime;
+
+
+		switch (bp->bp_infois) {
+		case BSTP_INFO_DISABLED:
+			bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
+			break;
 
-void
-bstp_initialization(struct bridge_softc *sc)
-{
-	struct bridge_iflist *bif, *mif;
-	u_char *e_addr;
+		case BSTP_INFO_AGED:
+			bstp_set_port_role(bp, BSTP_ROLE_DESIGNATED);
+			bstp_update_info(bp);
+			break;
 
-	BRIDGE_LOCK_ASSERT(sc);
+		case BSTP_INFO_MINE:
+			bstp_set_port_role(bp, BSTP_ROLE_DESIGNATED);
+			/* update the port info if stale */
+			if (bstp_info_cmp(&bp->bp_port_pv,
+			    &bp->bp_desg_pv) != INFO_SAME ||
+			    (rbp != NULL &&
+			    (bp->bp_port_msg_age != rbp->bp_port_msg_age ||
+			    bp->bp_port_max_age != rbp->bp_port_max_age ||
+			    bp->bp_port_fdelay != rbp->bp_port_fdelay ||
+			    bp->bp_port_htime != rbp->bp_port_htime)))
+				bstp_update_info(bp);
+			break;
 
-	mif = NULL;
-	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		if ((bif->bif_flags & IFBIF_STP) == 0)
-			continue;
-		if (bif->bif_ifp->if_type != IFT_ETHER)
-			continue;
-		bif->bif_port_id = (bif->bif_priority << 8) |
-		    (bif->bif_ifp->if_index & 0xff);
+		case BSTP_INFO_RECIEVED:
+			if (bp == rbp) {
+				/*
+				 * root priority is derived from this
+				 * port, make it the root port.
+				 */
+				bstp_set_port_role(bp, BSTP_ROLE_ROOT);
+				bs->bs_root_port = bp;
+			} else if (bstp_info_cmp(&bp->bp_port_pv,
+				    &bp->bp_desg_pv) == INFO_BETTER) {
+				/*
+				 * the port priority is lower than the root
+				 * port.
+				 */
+				bstp_set_port_role(bp, BSTP_ROLE_DESIGNATED);
+				bstp_update_info(bp);
+			} else {
+				if (bstp_same_bridgeid(
+				    bp->bp_port_pv.pv_dbridge_id,
+				    bs->bs_bridge_pv.pv_dbridge_id)) {
+					/*
+					 * the designated bridge refers to
+					 * another port on this bridge.
+					 */
+					bstp_set_port_role(bp,
+					    BSTP_ROLE_BACKUP);
+				} else {
+					/*
+					 * the port is an inferior path to the
+					 * root bridge.
+					 */
+					bstp_set_port_role(bp,
+					    BSTP_ROLE_ALTERNATE);
+				}
+			}
+			break;
+		}
+	}
+}
 
-		if (mif == NULL) {
-			mif = bif;
-			continue;
+static void
+bstp_update_state(struct bstp_state *bs, struct bstp_port *bp)
+{
+	struct bstp_port *bp2;
+	int synced;
+
+	BSTP_LOCK_ASSERT(bs);
+
+	/* check if all the ports have syncronised again */
+	if (!bs->bs_allsynced) {
+		synced = 1;
+		LIST_FOREACH(bp2, &bs->bs_bplist, bp_next) {
+			if (!(bp2->bp_synced ||
+			     bp2->bp_role == BSTP_ROLE_ROOT)) {
+				synced = 0;
+				break;
+			}
 		}
-		if (bstp_addr_cmp(IF_LLADDR(bif->bif_ifp),
-		    IF_LLADDR(mif->bif_ifp)) < 0) {
-			mif = bif;
-			continue;
+		bs->bs_allsynced = synced;
+	}
+
+	bstp_update_roles(bs, bp);
+	bstp_update_tc(bp);
+}
+
+static void
+bstp_update_roles(struct bstp_state *bs, struct bstp_port *bp)
+{
+	switch (bp->bp_role) {
+	case BSTP_ROLE_DISABLED:
+		/* Clear any flags if set */
+		if (bp->bp_sync || !bp->bp_synced || bp->bp_reroot) {
+			bp->bp_sync = 0;
+			bp->bp_synced = 1;
+			bp->bp_reroot = 0;
+		}
+		break;
+
+	case BSTP_ROLE_ALTERNATE:
+	case BSTP_ROLE_BACKUP:
+		if ((bs->bs_allsynced && !bp->bp_agree) ||
+		    (bp->bp_proposed && bp->bp_agree)) {
+			bp->bp_proposed = 0;
+			bp->bp_agree = 1;
+			bp->bp_flags |= BSTP_PORT_NEWINFO;
+			DPRINTF("%s -> ALTERNATE_AGREED\n",
+			    bp->bp_ifp->if_xname);
 		}
+
+		if (bp->bp_proposed && !bp->bp_agree) {
+			bstp_set_all_sync(bs);
+			bp->bp_proposed = 0;
+			DPRINTF("%s -> ALTERNATE_PROPOSED\n",
+			    bp->bp_ifp->if_xname);
+		}
+
+		/* Clear any flags if set */
+		if (bp->bp_sync || !bp->bp_synced || bp->bp_reroot) {
+			bp->bp_sync = 0;
+			bp->bp_synced = 1;
+			bp->bp_reroot = 0;
+			DPRINTF("%s -> ALTERNATE_PORT\n", bp->bp_ifp->if_xname);
+		}
+		break;
+
+	case BSTP_ROLE_ROOT:
+		if (bp->bp_state != BSTP_IFSTATE_FORWARDING && !bp->bp_reroot) {
+			bstp_set_all_reroot(bs);
+			DPRINTF("%s -> ROOT_REROOT\n", bp->bp_ifp->if_xname);
+		}
+
+		if ((bs->bs_allsynced && !bp->bp_agree) ||
+		    (bp->bp_proposed && bp->bp_agree)) {
+			bp->bp_proposed = 0;
+			bp->bp_sync = 0;
+			bp->bp_agree = 1;
+			bp->bp_flags |= BSTP_PORT_NEWINFO;
+			DPRINTF("%s -> ROOT_AGREED\n", bp->bp_ifp->if_xname);
+		}
+
+		if (bp->bp_proposed && !bp->bp_agree) {
+			bstp_set_all_sync(bs);
+			bp->bp_proposed = 0;
+			DPRINTF("%s -> ROOT_PROPOSED\n", bp->bp_ifp->if_xname);
+		}
+
+		if (bp->bp_state != BSTP_IFSTATE_FORWARDING &&
+		    (bp->bp_forward_delay_timer.active == 0 ||
+		    (bstp_rerooted(bs, bp) &&
+		    bp->bp_recent_backup_timer.active == 0 &&
+		    bp->bp_protover == BSTP_PROTO_RSTP))) {
+			switch (bp->bp_state) {
+			case BSTP_IFSTATE_DISCARDING:
+				bstp_set_port_state(bp, BSTP_IFSTATE_LEARNING);
+				break;
+			case BSTP_IFSTATE_LEARNING:
+				bstp_set_port_state(bp,
+				    BSTP_IFSTATE_FORWARDING);
+				break;
+			}
+		}
+
+		if (bp->bp_state == BSTP_IFSTATE_FORWARDING && bp->bp_reroot) {
+			bp->bp_reroot = 0;
+			DPRINTF("%s -> ROOT_REROOTED\n", bp->bp_ifp->if_xname);
+		}
+		break;
+
+	case BSTP_ROLE_DESIGNATED:
+		if (bp->bp_recent_root_timer.active == 0 && bp->bp_reroot) {
+			bp->bp_reroot = 0;
+			DPRINTF("%s -> DESIGNATED_RETIRED\n",
+			    bp->bp_ifp->if_xname);
+		}
+
+		if ((bp->bp_state == BSTP_IFSTATE_DISCARDING &&
+		    !bp->bp_synced) || (bp->bp_agreed && !bp->bp_synced) ||
+		    (bp->bp_operedge && !bp->bp_synced) ||
+		    (bp->bp_sync && bp->bp_synced)) {
+			bstp_timer_stop(&bp->bp_recent_root_timer);
+			bp->bp_synced = 1;
+			bp->bp_sync = 0;
+			DPRINTF("%s -> DESIGNATED_SYNCED\n",
+			    bp->bp_ifp->if_xname);
+		}
+
+		if (bp->bp_state != BSTP_IFSTATE_FORWARDING &&
+		    !bp->bp_agreed && !bp->bp_proposing &&
+		    !bp->bp_operedge) {
+			bp->bp_proposing = 1;
+			bp->bp_flags |= BSTP_PORT_NEWINFO;
+			bstp_timer_start(&bp->bp_edge_delay_timer,
+			    (bp->bp_ptp_link ? BSTP_DEFAULT_MIGRATE_DELAY :
+			     bp->bp_desg_max_age));
+			DPRINTF("%s -> DESIGNATED_PROPOSE\n",
+			    bp->bp_ifp->if_xname);
+		}
+
+		if (bp->bp_state != BSTP_IFSTATE_FORWARDING &&
+		    (bp->bp_forward_delay_timer.active == 0 || bp->bp_agreed ||
+		    bp->bp_operedge) &&
+		    (bp->bp_recent_root_timer.active == 0 || !bp->bp_reroot) &&
+		    !bp->bp_sync) {
+			if (bp->bp_agreed)
+				DPRINTF("%s -> AGREED\n", bp->bp_ifp->if_xname);
+			/*
+			 * If agreed|operedge then go straight to forwarding,
+			 * otherwise follow discard -> learn -> forward.
+			 */
+			if (bp->bp_agreed || bp->bp_operedge ||
+			    bp->bp_state == BSTP_IFSTATE_LEARNING) {
+				bstp_set_port_state(bp,
+				    BSTP_IFSTATE_FORWARDING);
+				bp->bp_agreed = bp->bp_protover;
+			} else if (bp->bp_state == BSTP_IFSTATE_DISCARDING)
+				bstp_set_port_state(bp, BSTP_IFSTATE_LEARNING);
+		}
+
+		if (((bp->bp_sync && !bp->bp_synced) ||
+		    (bp->bp_reroot && bp->bp_recent_root_timer.active) ||
+		    (bp->bp_flags & BSTP_PORT_DISPUTED)) && !bp->bp_operedge &&
+		    bp->bp_state != BSTP_IFSTATE_DISCARDING) {
+			bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING);
+			bp->bp_flags &= ~BSTP_PORT_DISPUTED;
+			bstp_timer_start(&bp->bp_forward_delay_timer,
+			    bp->bp_protover == BSTP_PROTO_RSTP ?
+			    bp->bp_desg_htime : bp->bp_desg_fdelay);
+			DPRINTF("%s -> DESIGNATED_DISCARD\n",
+			    bp->bp_ifp->if_xname);
+		}
+		break;
 	}
-	if (mif == NULL) {
-		bstp_stop(sc);
-		return;
+
+	if (bp->bp_flags & BSTP_PORT_NEWINFO)
+		bstp_transmit(bs, bp);
+}
+
+static void
+bstp_update_tc(struct bstp_port *bp)
+{
+	switch (bp->bp_tcstate) {
+		case BSTP_TCSTATE_ACTIVE:
+			if ((bp->bp_role != BSTP_ROLE_DESIGNATED &&
+			    bp->bp_role != BSTP_ROLE_ROOT) || bp->bp_operedge)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_LEARNING);
+
+			if (bp->bp_rcvdtcn)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_TCN);
+			if (bp->bp_rcvdtc)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_TC);
+
+			if (bp->bp_tc_prop && !bp->bp_operedge)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_PROPAG);
+
+			if (bp->bp_rcvdtca)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_ACK);
+			break;
+
+		case BSTP_TCSTATE_INACTIVE:
+			if ((bp->bp_state == BSTP_IFSTATE_LEARNING ||
+			    bp->bp_state == BSTP_IFSTATE_FORWARDING) &&
+			    bp->bp_fdbflush == 0)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_LEARNING);
+			break;
+
+		case BSTP_TCSTATE_LEARNING:
+			if (bp->bp_rcvdtc || bp->bp_rcvdtcn || bp->bp_rcvdtca ||
+			    bp->bp_tc_prop)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_LEARNING);
+			else if (bp->bp_role != BSTP_ROLE_DESIGNATED &&
+				 bp->bp_role != BSTP_ROLE_ROOT &&
+				 bp->bp_state == BSTP_IFSTATE_DISCARDING)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_INACTIVE);
+
+			if ((bp->bp_role == BSTP_ROLE_DESIGNATED ||
+			    bp->bp_role == BSTP_ROLE_ROOT) &&
+			    bp->bp_state == BSTP_IFSTATE_FORWARDING &&
+			    !bp->bp_operedge)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_DETECTED);
+			break;
+
+		/* these are transient states and go straight back to ACTIVE */
+		case BSTP_TCSTATE_DETECTED:
+		case BSTP_TCSTATE_TCN:
+		case BSTP_TCSTATE_TC:
+		case BSTP_TCSTATE_PROPAG:
+		case BSTP_TCSTATE_ACK:
+			DPRINTF("Invalid TC state for %s\n",
+			    bp->bp_ifp->if_xname);
+			break;
 	}
 
-	e_addr = IF_LLADDR(mif->bif_ifp);
-	sc->sc_bridge_id =
-	    (((uint64_t)sc->sc_bridge_priority) << 48) |
-	    (((uint64_t)e_addr[0]) << 40) |
-	    (((uint64_t)e_addr[1]) << 32) |
-	    (((uint64_t)e_addr[2]) << 24) |
-	    (((uint64_t)e_addr[3]) << 16) |
-	    (((uint64_t)e_addr[4]) << 8) |
-	    (((uint64_t)e_addr[5]));
+}
 
-	sc->sc_designated_root = sc->sc_bridge_id;
-	sc->sc_root_path_cost = 0;
-	sc->sc_root_port = NULL;
-
-	sc->sc_max_age = sc->sc_bridge_max_age;
-	sc->sc_hello_time = sc->sc_bridge_hello_time;
-	sc->sc_forward_delay = sc->sc_bridge_forward_delay;
-	sc->sc_topology_change_detected = 0;
-	sc->sc_topology_change = 0;
-	bstp_timer_stop(&sc->sc_tcn_timer);
-	bstp_timer_stop(&sc->sc_topology_change_timer);
-
-	if (callout_pending(&sc->sc_bstpcallout) == 0)
-		callout_reset(&sc->sc_bstpcallout, hz,
-		    bstp_tick, sc);
-
-	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		if (bif->bif_flags & IFBIF_STP)
-			bstp_ifupdstatus(sc, bif);
-		else
-			bstp_disable_port(sc, bif);
+static void
+bstp_update_info(struct bstp_port *bp)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	bp->bp_proposing = 0;
+	bp->bp_proposed = 0;
+
+	if (bp->bp_agreed && !bstp_pdu_bettersame(bp, BSTP_INFO_MINE))
+		bp->bp_agreed = 0;
+
+	if (bp->bp_synced && !bp->bp_agreed) {
+		bp->bp_synced = 0;
+		bs->bs_allsynced = 0;
 	}
 
-	bstp_port_state_selection(sc);
-	bstp_config_bpdu_generation(sc);
-	bstp_timer_start(&sc->sc_hello_timer, 0);
-	bstp_timer_start(&sc->sc_link_timer, 0);
+	/* copy the designated pv to the port */
+	bp->bp_port_pv = bp->bp_desg_pv;
+	bp->bp_port_msg_age = bp->bp_desg_msg_age;
+	bp->bp_port_max_age = bp->bp_desg_max_age;
+	bp->bp_port_fdelay = bp->bp_desg_fdelay;
+	bp->bp_port_htime = bp->bp_desg_htime;
+	bp->bp_infois = BSTP_INFO_MINE;
+
+	/* Set transmit flag but do not immediately send */
+	bp->bp_flags |= BSTP_PORT_NEWINFO;
 }
 
-void
-bstp_stop(struct bridge_softc *sc)
+/* set tcprop on every port other than the caller */
+static void
+bstp_set_other_tcprop(struct bstp_port *bp)
 {
-	struct bridge_iflist *bif;
+	struct bstp_state *bs = bp->bp_bs;
+	struct bstp_port *bp2;
 
-	BRIDGE_LOCK_ASSERT(sc);
+	BSTP_LOCK_ASSERT(bs);
 
-	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		bstp_set_port_state(bif, BSTP_IFSTATE_DISABLED);
-		bstp_timer_stop(&bif->bif_hold_timer);
-		bstp_timer_stop(&bif->bif_message_age_timer);
-		bstp_timer_stop(&bif->bif_forward_delay_timer);
+	LIST_FOREACH(bp2, &bs->bs_bplist, bp_next) {
+		if (bp2 == bp)
+			continue;
+		bp2->bp_tc_prop = 1;
 	}
+}
 
-	callout_stop(&sc->sc_bstpcallout);
+static void
+bstp_set_all_reroot(struct bstp_state *bs)
+{
+	struct bstp_port *bp;
 
-	bstp_timer_stop(&sc->sc_topology_change_timer);
-	bstp_timer_stop(&sc->sc_tcn_timer);
-	bstp_timer_stop(&sc->sc_hello_timer);
+	BSTP_LOCK_ASSERT(bs);
 
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
+		bp->bp_reroot = 1;
 }
 
 static void
-bstp_initialize_port(struct bridge_softc *sc, struct bridge_iflist *bif)
+bstp_set_all_sync(struct bstp_state *bs)
 {
-	bstp_become_designated_port(sc, bif);
-	bstp_set_port_state(bif, BSTP_IFSTATE_BLOCKING);
-	bif->bif_topology_change_acknowledge = 0;
-	bif->bif_config_pending = 0;
-	bif->bif_change_detection_enabled = 1;
-	bstp_timer_stop(&bif->bif_message_age_timer);
-	bstp_timer_stop(&bif->bif_forward_delay_timer);
-	bstp_timer_stop(&bif->bif_hold_timer);
+	struct bstp_port *bp;
+
+	BSTP_LOCK_ASSERT(bs);
+
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+		bp->bp_sync = 1;
+		bp->bp_synced = 0;	/* Not explicit in spec */
+	}
+
+	bs->bs_allsynced = 0;
 }
 
 static void
-bstp_enable_port(struct bridge_softc *sc, struct bridge_iflist *bif)
+bstp_set_port_state(struct bstp_port *bp, int state)
 {
-	bstp_initialize_port(sc, bif);
-	bstp_port_state_selection(sc);
+	if (bp->bp_state == state)
+		return;
+
+	bp->bp_state = state;
+
+	switch (bp->bp_state) {
+		case BSTP_IFSTATE_DISCARDING:
+			DPRINTF("state changed to DISCARDING on %s\n",
+			    bp->bp_ifp->if_xname);
+			break;
+
+		case BSTP_IFSTATE_LEARNING:
+			DPRINTF("state changed to LEARNING on %s\n",
+			    bp->bp_ifp->if_xname);
+
+			bstp_timer_start(&bp->bp_forward_delay_timer,
+			    bp->bp_protover == BSTP_PROTO_RSTP ?
+			    bp->bp_desg_htime : bp->bp_desg_fdelay);
+			break;
+
+		case BSTP_IFSTATE_FORWARDING:
+			DPRINTF("state changed to FORWARDING on %s\n",
+			    bp->bp_ifp->if_xname);
+
+			bstp_timer_stop(&bp->bp_forward_delay_timer);
+			/* Record that we enabled forwarding */
+			bp->bp_forward_transitions++;
+			break;
+	}
+
+	/* notify the parent bridge */
+	taskqueue_enqueue(taskqueue_swi, &bp->bp_statetask);
 }
 
 static void
-bstp_disable_port(struct bridge_softc *sc, struct bridge_iflist *bif)
+bstp_set_port_role(struct bstp_port *bp, int role)
 {
-	int root;
+	struct bstp_state *bs = bp->bp_bs;
 
-	BRIDGE_LOCK_ASSERT(sc);
+	if (bp->bp_role == role)
+		return;
 
-	root = bstp_root_bridge(sc);
-	bstp_become_designated_port(sc, bif);
-	bstp_set_port_state(bif, BSTP_IFSTATE_DISABLED);
-	bif->bif_topology_change_acknowledge = 0;
-	bif->bif_config_pending = 0;
-	bstp_timer_stop(&bif->bif_message_age_timer);
-	bstp_timer_stop(&bif->bif_forward_delay_timer);
-	bstp_configuration_update(sc);
-	bstp_port_state_selection(sc);
-	bridge_rtdelete(sc, bif->bif_ifp, IFBF_FLUSHDYN);
+	/* perform pre-change tasks */
+	switch (bp->bp_role) {
+		case BSTP_ROLE_DISABLED:
+			bstp_timer_start(&bp->bp_forward_delay_timer,
+			    bp->bp_desg_max_age);
+			break;
 
-	if (bstp_root_bridge(sc) && (root == 0)) {
-		sc->sc_max_age = sc->sc_bridge_max_age;
-		sc->sc_hello_time = sc->sc_bridge_hello_time;
-		sc->sc_forward_delay = sc->sc_bridge_forward_delay;
+		case BSTP_ROLE_BACKUP:
+			bstp_timer_start(&bp->bp_recent_backup_timer,
+			    bp->bp_desg_htime * 2);
+			/* fall through */
+		case BSTP_ROLE_ALTERNATE:
+			bstp_timer_start(&bp->bp_forward_delay_timer,
+			    bp->bp_desg_fdelay);
+			bp->bp_sync = 0;
+			bp->bp_synced = 1;
+			bp->bp_reroot = 0;
+			break;
 
-		bstp_topology_change_detection(sc);
-		bstp_timer_stop(&sc->sc_tcn_timer);
-		bstp_config_bpdu_generation(sc);
-		bstp_timer_start(&sc->sc_hello_timer, 0);
+		case BSTP_ROLE_ROOT:
+			bstp_timer_start(&bp->bp_recent_root_timer,
+			    BSTP_DEFAULT_FORWARD_DELAY);
+			break;
 	}
+
+	bp->bp_role = role;
+	/* clear values not carried between roles */
+	bp->bp_proposing = 0;
+	bs->bs_allsynced = 0;
+
+	/* initialise the new role */
+	switch (bp->bp_role) {
+		case BSTP_ROLE_DISABLED:
+		case BSTP_ROLE_ALTERNATE:
+		case BSTP_ROLE_BACKUP:
+			DPRINTF("%s role -> ALT/BACK/DISABLED\n",
+			    bp->bp_ifp->if_xname);
+			bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING);
+			bstp_timer_stop(&bp->bp_recent_root_timer);
+			bstp_timer_latch(&bp->bp_forward_delay_timer);
+			bp->bp_sync = 0;
+			bp->bp_synced = 1;
+			bp->bp_reroot = 0;
+			break;
+
+		case BSTP_ROLE_ROOT:
+			DPRINTF("%s role -> ROOT\n",
+			    bp->bp_ifp->if_xname);
+			bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING);
+			bstp_timer_latch(&bp->bp_recent_root_timer);
+			bp->bp_proposing = 0;
+			break;
+
+		case BSTP_ROLE_DESIGNATED:
+			DPRINTF("%s role -> DESIGNATED\n",
+			    bp->bp_ifp->if_xname);
+			bstp_timer_start(&bp->bp_hello_timer,
+			    bp->bp_desg_htime);
+			bp->bp_agree = 0;
+			break;
+	}
+
+	/* let the TC state know that the role changed */
+	bstp_update_tc(bp);
 }
 
-#ifdef notused
 static void
-bstp_set_bridge_priority(struct bridge_softc *sc, uint64_t new_bridge_id)
+bstp_set_port_proto(struct bstp_port *bp, int proto)
 {
-	struct bridge_iflist *bif;
-	int root;
+	struct bstp_state *bs = bp->bp_bs;
 
-	BRIDGE_LOCK_ASSERT(sc);
+	/* supported protocol versions */
+	switch (proto) {
+		case BSTP_PROTO_STP:
+			/* we can downgrade protocols only */
+			bstp_timer_stop(&bp->bp_migrate_delay_timer);
+			/* clear unsupported features */
+			bp->bp_operedge = 0;
+			/* STP compat mode only uses 16 bits of the 32 */
+			if (bp->bp_path_cost > 65535)
+				bp->bp_path_cost = 65535;
+			break;
 
-	root = bstp_root_bridge(sc);
+		case BSTP_PROTO_RSTP:
+			bstp_timer_start(&bp->bp_migrate_delay_timer,
+			    bs->bs_migration_delay);
+			break;
 
-	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		if ((bif->bif_flags & IFBIF_STP) == 0)
-			continue;
-		if (bstp_designated_port(sc, bif))
-			bif->bif_designated_bridge = new_bridge_id;
+		default:
+			DPRINTF("Unsupported STP version %d\n", proto);
+			return;
 	}
 
-	sc->sc_bridge_id = new_bridge_id;
+	bp->bp_protover = proto;
+	bp->bp_flags &= ~BSTP_PORT_CANMIGRATE;
+}
+
+static void
+bstp_set_port_tc(struct bstp_port *bp, int state)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	bp->bp_tcstate = state;
+
+	/* initialise the new state */
+	switch (bp->bp_tcstate) {
+		case BSTP_TCSTATE_ACTIVE:
+			DPRINTF("%s -> TC_ACTIVE\n", bp->bp_ifp->if_xname);
+			/* nothing to do */
+			break;
+
+		case BSTP_TCSTATE_INACTIVE:
+			bstp_timer_stop(&bp->bp_tc_timer);
+			/* flush routes on the parent bridge */
+			bp->bp_fdbflush = 1;
+			taskqueue_enqueue(taskqueue_swi, &bp->bp_rtagetask);
+			bp->bp_tc_ack = 0;
+			DPRINTF("%s -> TC_INACTIVE\n", bp->bp_ifp->if_xname);
+			break;
+
+		case BSTP_TCSTATE_LEARNING:
+			bp->bp_rcvdtc = 0;
+			bp->bp_rcvdtcn = 0;
+			bp->bp_rcvdtca = 0;
+			bp->bp_tc_prop = 0;
+			DPRINTF("%s -> TC_LEARNING\n", bp->bp_ifp->if_xname);
+			break;
 
-	bstp_configuration_update(sc);
-	bstp_port_state_selection(sc);
+		case BSTP_TCSTATE_DETECTED:
+			bstp_set_timer_tc(bp);
+			bstp_set_other_tcprop(bp);
+			/* send out notification */
+			bp->bp_flags |= BSTP_PORT_NEWINFO;
+			bstp_transmit(bs, bp);
+			getmicrotime(&bs->bs_last_tc_time);
+			DPRINTF("%s -> TC_DETECTED\n", bp->bp_ifp->if_xname);
+			bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */
+			break;
 
-	if (bstp_root_bridge(sc) && (root == 0)) {
-		sc->sc_max_age = sc->sc_bridge_max_age;
-		sc->sc_hello_time = sc->sc_bridge_hello_time;
-		sc->sc_forward_delay = sc->sc_bridge_forward_delay;
+		case BSTP_TCSTATE_TCN:
+			bstp_set_timer_tc(bp);
+			DPRINTF("%s -> TC_TCN\n", bp->bp_ifp->if_xname);
+			/* fall through */
+		case BSTP_TCSTATE_TC:
+			bp->bp_rcvdtc = 0;
+			bp->bp_rcvdtcn = 0;
+			if (bp->bp_role == BSTP_ROLE_DESIGNATED)
+				bp->bp_tc_ack = 1;
+
+			bstp_set_other_tcprop(bp);
+			DPRINTF("%s -> TC_TC\n", bp->bp_ifp->if_xname);
+			bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */
+			break;
 
-		bstp_topology_change_detection(sc);
-		bstp_timer_stop(&sc->sc_tcn_timer);
-		bstp_config_bpdu_generation(sc);
-		bstp_timer_start(&sc->sc_hello_timer, 0);
+		case BSTP_TCSTATE_PROPAG:
+			/* flush routes on the parent bridge */
+			bp->bp_fdbflush = 1;
+			taskqueue_enqueue(taskqueue_swi, &bp->bp_rtagetask);
+			bp->bp_tc_prop = 0;
+			bstp_set_timer_tc(bp);
+			DPRINTF("%s -> TC_PROPAG\n", bp->bp_ifp->if_xname);
+			bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */
+			break;
+
+		case BSTP_TCSTATE_ACK:
+			bstp_timer_stop(&bp->bp_tc_timer);
+			bp->bp_rcvdtca = 0;
+			DPRINTF("%s -> TC_ACK\n", bp->bp_ifp->if_xname);
+			bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */
+			break;
 	}
 }
 
 static void
-bstp_set_port_priority(struct bridge_softc *sc, struct bridge_iflist *bif,
-    uint16_t new_port_id)
+bstp_set_timer_tc(struct bstp_port *bp)
 {
-	if (bstp_designated_port(sc, bif))
-		bif->bif_designated_port = new_port_id;
+	struct bstp_state *bs = bp->bp_bs;
 
-	bif->bif_port_id = new_port_id;
+	if (bp->bp_tc_timer.active)
+		return;
 
-	if ((sc->sc_bridge_id == bif->bif_designated_bridge) &&
-	    (bif->bif_port_id < bif->bif_designated_port)) {
-		bstp_become_designated_port(sc, bif);
-		bstp_port_state_selection(sc);
+	switch (bp->bp_protover) {
+		case BSTP_PROTO_RSTP:
+			bstp_timer_start(&bp->bp_tc_timer,
+			    bp->bp_desg_htime + BSTP_TICK_VAL);
+			bp->bp_flags |= BSTP_PORT_NEWINFO;
+			break;
+
+		case BSTP_PROTO_STP:
+			bstp_timer_start(&bp->bp_tc_timer,
+			    bs->bs_root_max_age + bs->bs_root_fdelay);
+			break;
 	}
 }
 
 static void
-bstp_set_path_cost(struct bridge_softc *sc, struct bridge_iflist *bif,
-    uint32_t path_cost)
+bstp_set_timer_msgage(struct bstp_port *bp)
+{
+	if (bp->bp_port_msg_age + BSTP_MESSAGE_AGE_INCR <=
+	    bp->bp_port_max_age) {
+		bstp_timer_start(&bp->bp_message_age_timer,
+		    bp->bp_port_htime * 3);
+	} else
+		/* expires immediately */
+		bstp_timer_start(&bp->bp_message_age_timer, 0);
+}
+
+static int
+bstp_rerooted(struct bstp_state *bs, struct bstp_port *bp)
 {
-	bif->bif_path_cost = path_cost;
-	bstp_configuration_update(sc);
-	bstp_port_state_selection(sc);
+	struct bstp_port *bp2;
+	int rr_set = 0;
+
+	LIST_FOREACH(bp2, &bs->bs_bplist, bp_next) {
+		if (bp2 == bp)
+			continue;
+		if (bp2->bp_recent_root_timer.active) {
+			rr_set = 1;
+			break;
+		}
+	}
+	return (!rr_set);
+}
+
+int
+bstp_set_htime(struct bstp_state *bs, int t)
+{
+	/* convert seconds to ticks */
+	t *=  BSTP_TICK_VAL;
+
+	/* value can only be changed in leagacy stp mode */
+	if (bs->bs_protover != BSTP_PROTO_STP)
+		return (EPERM);
+
+	if (t < BSTP_MIN_HELLO_TIME || t > BSTP_MAX_HELLO_TIME)
+		return (EINVAL);
+
+	BSTP_LOCK(bs);
+	bs->bs_bridge_htime = t;
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+	return (0);
 }
 
+int
+bstp_set_fdelay(struct bstp_state *bs, int t)
+{
+	/* convert seconds to ticks */
+	t *= BSTP_TICK_VAL;
+
+	if (t < BSTP_MIN_FORWARD_DELAY || t > BSTP_MAX_FORWARD_DELAY)
+		return (EINVAL);
+
+	BSTP_LOCK(bs);
+	bs->bs_bridge_fdelay = t;
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_maxage(struct bstp_state *bs, int t)
+{
+	/* convert seconds to ticks */
+	t *= BSTP_TICK_VAL;
+
+	if (t < BSTP_MIN_MAX_AGE || t > BSTP_MAX_MAX_AGE)
+		return (EINVAL);
+
+	BSTP_LOCK(bs);
+	bs->bs_bridge_max_age = t;
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_holdcount(struct bstp_state *bs, int count)
+{
+	struct bstp_port *bp;
+
+	if (count < BSTP_MIN_HOLD_COUNT ||
+	    count > BSTP_MAX_HOLD_COUNT)
+		return (EINVAL);
+
+	BSTP_LOCK(bs);
+	bs->bs_txholdcount = count;
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
+		bp->bp_txcount = 0;
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_protocol(struct bstp_state *bs, int proto)
+{
+	struct bstp_port *bp;
+
+	switch (proto) {
+		/* Supported protocol versions */
+		case BSTP_PROTO_STP:
+		case BSTP_PROTO_RSTP:
+			break;
+
+		default:
+			return (EINVAL);
+	}
+
+	BSTP_LOCK(bs);
+	bs->bs_protover = proto;
+	bs->bs_bridge_htime = BSTP_DEFAULT_HELLO_TIME;
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+		/* reinit state */
+		bp->bp_infois = BSTP_INFO_DISABLED;
+		bp->bp_txcount = 0;
+		bstp_set_port_proto(bp, bs->bs_protover);
+		bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
+		bstp_set_port_tc(bp, BSTP_TCSTATE_INACTIVE);
+		bstp_timer_stop(&bp->bp_recent_backup_timer);
+	}
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_priority(struct bstp_state *bs, int pri)
+{
+	if (pri < 0 || pri > BSTP_MAX_PRIORITY)
+		return (EINVAL);
+
+	/* Limit to steps of 4096 */
+	pri -= pri % 4096;
+
+	BSTP_LOCK(bs);
+	bs->bs_bridge_priority = pri;
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_port_priority(struct bstp_port *bp, int pri)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	if (pri < 0 || pri > BSTP_MAX_PORT_PRIORITY)
+		return (EINVAL);
+
+	/* Limit to steps of 16 */
+	pri -= pri % 16;
+
+	BSTP_LOCK(bs);
+	bp->bp_priority = pri;
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_path_cost(struct bstp_port *bp, uint32_t path_cost)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	if (path_cost > BSTP_MAX_PATH_COST)
+		return (EINVAL);
+
+	/* STP compat mode only uses 16 bits of the 32 */
+	if (bp->bp_protover == BSTP_PROTO_STP && path_cost > 65535)
+		path_cost = 65535;
+
+	BSTP_LOCK(bs);
+
+	if (path_cost == 0) {	/* use auto */
+		bp->bp_flags &= ~BSTP_PORT_ADMCOST;
+		bp->bp_path_cost = bstp_calc_path_cost(bp);
+	} else {
+		bp->bp_path_cost = path_cost;
+		bp->bp_flags |= BSTP_PORT_ADMCOST;
+	}
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_edge(struct bstp_port *bp, int set)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	BSTP_LOCK(bs);
+	if ((bp->bp_operedge = set) == 0)
+		bp->bp_flags &= ~BSTP_PORT_ADMEDGE;
+	else
+		bp->bp_flags |= BSTP_PORT_ADMEDGE;
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_autoedge(struct bstp_port *bp, int set)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	BSTP_LOCK(bs);
+	if (set) {
+		bp->bp_flags |= BSTP_PORT_AUTOEDGE;
+		/* we may be able to transition straight to edge */
+		if (bp->bp_edge_delay_timer.active == 0)
+			bstp_edge_delay_expiry(bs, bp);
+	} else
+		bp->bp_flags &= ~BSTP_PORT_AUTOEDGE;
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_ptp(struct bstp_port *bp, int set)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	BSTP_LOCK(bs);
+	bp->bp_ptp_link = set;
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_autoptp(struct bstp_port *bp, int set)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	BSTP_LOCK(bs);
+	if (set) {
+		bp->bp_flags |= BSTP_PORT_AUTOPTP;
+		if (bp->bp_role != BSTP_ROLE_DISABLED)
+			bstp_ifupdstatus(bs, bp);
+	} else
+		bp->bp_flags &= ~BSTP_PORT_AUTOPTP;
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+/*
+ * Calculate the path cost according to the link speed.
+ */
+static uint32_t
+bstp_calc_path_cost(struct bstp_port *bp)
+{
+	struct ifnet *ifp = bp->bp_ifp;
+	uint32_t path_cost;
+
+	/* If the priority has been manually set then retain the value */
+	if (bp->bp_flags & BSTP_PORT_ADMCOST)
+		return bp->bp_path_cost;
+
+	if (ifp->if_link_state == LINK_STATE_DOWN) {
+		/* Recalc when the link comes up again */
+		bp->bp_flags |= BSTP_PORT_PNDCOST;
+		return (BSTP_DEFAULT_PATH_COST);
+	}
+
+	if (ifp->if_baudrate < 1000)
+		return (BSTP_DEFAULT_PATH_COST);
+
+ 	/* formula from section 17.14, IEEE Std 802.1D-2004 */
+	path_cost = 20000000000ULL / (ifp->if_baudrate / 1000);
+
+	if (path_cost > BSTP_MAX_PATH_COST)
+		path_cost = BSTP_MAX_PATH_COST;
+
+	/* STP compat mode only uses 16 bits of the 32 */
+	if (bp->bp_protover == BSTP_PROTO_STP && path_cost > 65535)
+		path_cost = 65535;
+
+	return (path_cost);
+}
+
+/*
+ * Notify the bridge that a port state has changed, we need to do this from a
+ * taskqueue to avoid a LOR.
+ */
 static void
-bstp_enable_change_detection(struct bridge_iflist *bif)
+bstp_notify_state(void *arg, int pending)
 {
-	bif->bif_change_detection_enabled = 1;
+	struct bstp_port *bp = (struct bstp_port *)arg;
+	struct bstp_state *bs = bp->bp_bs;
+
+	if (bp->bp_active == 1 && bs->bs_state_cb != NULL)
+		(*bs->bs_state_cb)(bp->bp_ifp, bp->bp_state);
 }
 
+/*
+ * Flush the routes on the bridge port, we need to do this from a
+ * taskqueue to avoid a LOR.
+ */
 static void
-bstp_disable_change_detection(struct bridge_iflist *bif)
+bstp_notify_rtage(void *arg, int pending)
 {
-	bif->bif_change_detection_enabled = 0;
+	struct bstp_port *bp = (struct bstp_port *)arg;
+	struct bstp_state *bs = bp->bp_bs;
+	int age = 0;
+
+	BSTP_LOCK(bs);
+	switch (bp->bp_protover) {
+		case BSTP_PROTO_STP:
+			/* convert to seconds */
+			age = bp->bp_desg_fdelay / BSTP_TICK_VAL;
+			break;
+
+		case BSTP_PROTO_RSTP:
+			age = 0;
+			break;
+	}
+	BSTP_UNLOCK(bs);
+
+	if (bp->bp_active == 1 && bs->bs_rtage_cb != NULL)
+		(*bs->bs_rtage_cb)(bp->bp_ifp, age);
+
+	/* flush is complete */
+	BSTP_LOCK(bs);
+	bp->bp_fdbflush = 0;
+	BSTP_UNLOCK(bs);
 }
-#endif /* notused */
 
 void
 bstp_linkstate(struct ifnet *ifp, int state)
 {
-	struct bridge_softc *sc;
-	struct bridge_iflist *bif;
-
-	sc = ifp->if_bridge;
-	BRIDGE_LOCK(sc);
+	struct bstp_state *bs;
+	struct bstp_port *bp;
 
-	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		if ((bif->bif_flags & IFBIF_STP) == 0)
-			continue;
-
-		if (bif->bif_ifp == ifp) {
-			bstp_ifupdstatus(sc, bif);
-			break;
+	/* search for the stp port */
+	mtx_lock(&bstp_list_mtx);
+	LIST_FOREACH(bs, &bstp_list, bs_list) {
+		BSTP_LOCK(bs);
+		LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+			if (bp->bp_ifp == ifp) {
+				bstp_ifupdstatus(bs, bp);
+				bstp_update_state(bs, bp);
+				/* it only exists once so return */
+				BSTP_UNLOCK(bs);
+				mtx_unlock(&bstp_list_mtx);
+				return;
+			}
 		}
+		BSTP_UNLOCK(bs);
 	}
-
-	BRIDGE_UNLOCK(sc);
+	mtx_unlock(&bstp_list_mtx);
 }
 
 static void
-bstp_ifupdstatus(struct bridge_softc *sc, struct bridge_iflist *bif)
+bstp_ifupdstatus(struct bstp_state *bs, struct bstp_port *bp)
 {
-	struct ifnet *ifp = bif->bif_ifp;
+	struct ifnet *ifp = bp->bp_ifp;
 	struct ifmediareq ifmr;
 	int error = 0;
 
-	BRIDGE_LOCK_ASSERT(sc);
+	BSTP_LOCK_ASSERT(bs);
 
 	bzero((char *)&ifmr, sizeof(ifmr));
 	error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
 
 	if ((error == 0) && (ifp->if_flags & IFF_UP)) {
-	 	if (ifmr.ifm_status & IFM_ACTIVE) {
-			if (bif->bif_state == BSTP_IFSTATE_DISABLED)
-				bstp_enable_port(sc, bif);
+		if (ifmr.ifm_status & IFM_ACTIVE) {
+			/* A full-duplex link is assumed to be point to point */
+			if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
+				bp->bp_ptp_link =
+				    ifmr.ifm_active & IFM_FDX ? 1 : 0;
+			}
 
+			/* Calc the cost if the link was down previously */
+			if (bp->bp_flags & BSTP_PORT_PNDCOST) {
+				bp->bp_path_cost = bstp_calc_path_cost(bp);
+				bp->bp_flags &= ~BSTP_PORT_PNDCOST;
+			}
+
+			if (bp->bp_role == BSTP_ROLE_DISABLED)
+				bstp_enable_port(bs, bp);
 		} else {
-			if (bif->bif_state != BSTP_IFSTATE_DISABLED)
-				bstp_disable_port(sc, bif);
+			if (bp->bp_role != BSTP_ROLE_DISABLED) {
+				bstp_disable_port(bs, bp);
+				if ((bp->bp_flags & BSTP_PORT_ADMEDGE) &&
+				    bp->bp_protover == BSTP_PROTO_RSTP)
+					bp->bp_operedge = 1;
+			}
 		}
 		return;
 	}
 
-	if (bif->bif_state != BSTP_IFSTATE_DISABLED)
-		bstp_disable_port(sc, bif);
+	if (bp->bp_infois != BSTP_INFO_DISABLED)
+		bstp_disable_port(bs, bp);
+}
+
+static void
+bstp_enable_port(struct bstp_state *bs, struct bstp_port *bp)
+{
+	bp->bp_infois = BSTP_INFO_AGED;
+	bstp_assign_roles(bs);
+}
+
+static void
+bstp_disable_port(struct bstp_state *bs, struct bstp_port *bp)
+{
+	bp->bp_infois = BSTP_INFO_DISABLED;
+	bstp_assign_roles(bs);
 }
 
 static void
 bstp_tick(void *arg)
 {
-	struct bridge_softc *sc = arg;
-	struct bridge_iflist *bif;
+	struct bstp_state *bs = arg;
+	struct bstp_port *bp;
 
-	BRIDGE_LOCK_ASSERT(sc);
+	BSTP_LOCK_ASSERT(bs);
+
+	if (bs->bs_running == 0)
+		return;
 
 	/* slow timer to catch missed link events */
-	if (bstp_timer_expired(&sc->sc_link_timer, BSTP_LINK_TIMER)) {
-		LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-			if ((bif->bif_flags & IFBIF_STP) == 0)
-				continue;
-			bstp_ifupdstatus(sc, bif);
-		}
-		bstp_timer_start(&sc->sc_link_timer, 0);
+	if (bstp_timer_expired(&bs->bs_link_timer)) {
+		LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
+			bstp_ifupdstatus(bs, bp);
+		bstp_timer_start(&bs->bs_link_timer, BSTP_LINK_TIMER);
 	}
 
-	if (bstp_timer_expired(&sc->sc_hello_timer, sc->sc_hello_time))
-		bstp_hello_timer_expiry(sc);
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+		/* no events need to happen for these */
+		bstp_timer_expired(&bp->bp_tc_timer);
+		bstp_timer_expired(&bp->bp_recent_root_timer);
+		bstp_timer_expired(&bp->bp_forward_delay_timer);
+		bstp_timer_expired(&bp->bp_recent_backup_timer);
 
-	if (bstp_timer_expired(&sc->sc_tcn_timer, sc->sc_bridge_hello_time))
-		bstp_tcn_timer_expiry(sc);
+		if (bstp_timer_expired(&bp->bp_hello_timer))
+			bstp_hello_timer_expiry(bs, bp);
 
-	if (bstp_timer_expired(&sc->sc_topology_change_timer,
-	    sc->sc_topology_change_time))
-		bstp_topology_change_timer_expiry(sc);
+		if (bstp_timer_expired(&bp->bp_message_age_timer))
+			bstp_message_age_expiry(bs, bp);
 
-	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		if ((bif->bif_flags & IFBIF_STP) == 0)
-			continue;
-		if (bstp_timer_expired(&bif->bif_message_age_timer,
-		    sc->sc_max_age))
-			bstp_message_age_timer_expiry(sc, bif);
-	}
+		if (bstp_timer_expired(&bp->bp_migrate_delay_timer))
+			bstp_migrate_delay_expiry(bs, bp);
 
-	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		if ((bif->bif_flags & IFBIF_STP) == 0)
-			continue;
-		if (bstp_timer_expired(&bif->bif_forward_delay_timer,
-		    sc->sc_forward_delay))
-			bstp_forward_delay_timer_expiry(sc, bif);
+		if (bstp_timer_expired(&bp->bp_edge_delay_timer))
+			bstp_edge_delay_expiry(bs, bp);
+
+		/* update the various state machines for the port */
+		bstp_update_state(bs, bp);
 
-		if (bstp_timer_expired(&bif->bif_hold_timer,
-		    sc->sc_hold_time))
-			bstp_hold_timer_expiry(sc, bif);
+		if (bp->bp_txcount > 0)
+			bp->bp_txcount--;
 	}
 
-	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
-		callout_reset(&sc->sc_bstpcallout, hz, bstp_tick, sc);
+	callout_reset(&bs->bs_bstpcallout, hz, bstp_tick, bs);
 }
 
 static void
-bstp_timer_start(struct bridge_timer *t, uint16_t v)
+bstp_timer_start(struct bstp_timer *t, uint16_t v)
 {
 	t->value = v;
 	t->active = 1;
+	t->latched = 0;
 }
 
 static void
-bstp_timer_stop(struct bridge_timer *t)
+bstp_timer_stop(struct bstp_timer *t)
 {
 	t->value = 0;
 	t->active = 0;
+	t->latched = 0;
+}
+
+static void
+bstp_timer_latch(struct bstp_timer *t)
+{
+	t->latched = 1;
+	t->active = 1;
 }
 
 static int
-bstp_timer_expired(struct bridge_timer *t, uint16_t v)
+bstp_timer_expired(struct bstp_timer *t)
 {
-	if (t->active == 0)
+	if (t->active == 0 || t->latched)
 		return (0);
-	t->value += BSTP_TICK_VAL;
-	if (t->value >= v) {
+	t->value -= BSTP_TICK_VAL;
+	if (t->value <= 0) {
 		bstp_timer_stop(t);
 		return (1);
 	}
 	return (0);
+}
+
+static void
+bstp_hello_timer_expiry(struct bstp_state *bs, struct bstp_port *bp)
+{
+	if ((bp->bp_flags & BSTP_PORT_NEWINFO) ||
+	    bp->bp_role == BSTP_ROLE_DESIGNATED ||
+	    (bp->bp_role == BSTP_ROLE_ROOT &&
+	     bp->bp_tc_timer.active == 1)) {
+		bstp_timer_start(&bp->bp_hello_timer, bp->bp_desg_htime);
+		bp->bp_flags |= BSTP_PORT_NEWINFO;
+		bstp_transmit(bs, bp);
+	}
+}
+
+static void
+bstp_message_age_expiry(struct bstp_state *bs, struct bstp_port *bp)
+{
+	if (bp->bp_infois == BSTP_INFO_RECIEVED) {
+		bp->bp_infois = BSTP_INFO_AGED;
+		bstp_assign_roles(bs);
+		DPRINTF("aged info on %s\n", bp->bp_ifp->if_xname);
+	}
+}
+
+static void
+bstp_migrate_delay_expiry(struct bstp_state *bs, struct bstp_port *bp)
+{
+	bp->bp_flags |= BSTP_PORT_CANMIGRATE;
+}
+
+static void
+bstp_edge_delay_expiry(struct bstp_state *bs, struct bstp_port *bp)
+{
+	if ((bp->bp_flags & BSTP_PORT_AUTOEDGE) &&
+	    bp->bp_protover == BSTP_PROTO_RSTP && bp->bp_proposing &&
+	    bp->bp_role == BSTP_ROLE_DESIGNATED) {
+		bp->bp_operedge = 1;
+		DPRINTF("%s -> edge port\n", bp->bp_ifp->if_xname);
+	}
+}
+
+static int
+bstp_addr_cmp(const uint8_t *a, const uint8_t *b)
+{
+	int i, d;
+
+	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
+		d = ((int)a[i]) - ((int)b[i]);
+	}
 
+	return (d);
+}
+
+/*
+ * compare the bridge address component of the bridgeid
+ */
+static int
+bstp_same_bridgeid(uint64_t id1, uint64_t id2)
+{
+	u_char addr1[ETHER_ADDR_LEN];
+	u_char addr2[ETHER_ADDR_LEN];
+
+	PV2ADDR(id1, addr1);
+	PV2ADDR(id2, addr2);
+
+	if (bstp_addr_cmp(addr1, addr2) == 0)
+		return (1);
+
+	return (0);
+}
+
+void
+bstp_reinit(struct bstp_state *bs)
+{
+	struct bstp_port *bp;
+	struct ifnet *ifp, *mif;
+	u_char *e_addr;
+	static const u_char llzero[ETHER_ADDR_LEN];	/* 00:00:00:00:00:00 */
+
+	BSTP_LOCK_ASSERT(bs);
+
+	mif = NULL;
+	/*
+	 * Search through the Ethernet adapters and find the one with the
+	 * lowest value. The adapter which we take the MAC address from does
+	 * not need to be part of the bridge, it just needs to be a unique
+	 * value.
+	 */
+	IFNET_RLOCK();
+	TAILQ_FOREACH(ifp, &ifnet, if_link) {
+		if (ifp->if_type != IFT_ETHER)
+			continue;
+
+		if (bstp_addr_cmp(IF_LLADDR(ifp), llzero) == 0)
+			continue;
+
+		if (mif == NULL) {
+			mif = ifp;
+			continue;
+		}
+		if (bstp_addr_cmp(IF_LLADDR(ifp), IF_LLADDR(mif)) < 0) {
+			mif = ifp;
+			continue;
+		}
+	}
+	IFNET_RUNLOCK();
+
+	if (LIST_EMPTY(&bs->bs_bplist) || mif == NULL) {
+		/* Set the bridge and root id (lower bits) to zero */
+		bs->bs_bridge_pv.pv_dbridge_id =
+		    ((uint64_t)bs->bs_bridge_priority) << 48;
+		bs->bs_bridge_pv.pv_root_id = bs->bs_bridge_pv.pv_dbridge_id;
+		bs->bs_root_pv = bs->bs_bridge_pv;
+		/* Disable any remaining ports, they will have no MAC address */
+		LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+			bp->bp_infois = BSTP_INFO_DISABLED;
+			bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
+		}
+		callout_stop(&bs->bs_bstpcallout);
+		return;
+	}
+
+	e_addr = IF_LLADDR(mif);
+	bs->bs_bridge_pv.pv_dbridge_id =
+	    (((uint64_t)bs->bs_bridge_priority) << 48) |
+	    (((uint64_t)e_addr[0]) << 40) |
+	    (((uint64_t)e_addr[1]) << 32) |
+	    (((uint64_t)e_addr[2]) << 24) |
+	    (((uint64_t)e_addr[3]) << 16) |
+	    (((uint64_t)e_addr[4]) << 8) |
+	    (((uint64_t)e_addr[5]));
+
+	bs->bs_bridge_pv.pv_root_id = bs->bs_bridge_pv.pv_dbridge_id;
+	bs->bs_bridge_pv.pv_cost = 0;
+	bs->bs_bridge_pv.pv_dport_id = 0;
+	bs->bs_bridge_pv.pv_port_id = 0;
+
+	if (bs->bs_running && callout_pending(&bs->bs_bstpcallout) == 0)
+		callout_reset(&bs->bs_bstpcallout, hz, bstp_tick, bs);
+
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+		bp->bp_port_id = (bp->bp_priority << 8) |
+		    (bp->bp_ifp->if_index  & 0xfff);
+		bstp_ifupdstatus(bs, bp);
+	}
+
+	bstp_assign_roles(bs);
+	bstp_timer_start(&bs->bs_link_timer, BSTP_LINK_TIMER);
+}
+
+static int
+bstp_modevent(module_t mod, int type, void *data)
+{
+	switch (type) {
+	case MOD_LOAD:
+		mtx_init(&bstp_list_mtx, "bridgestp list", NULL, MTX_DEF);
+		LIST_INIT(&bstp_list);
+		bstp_linkstate_p = bstp_linkstate;
+		break;
+	case MOD_UNLOAD:
+		bstp_linkstate_p = NULL;
+		mtx_destroy(&bstp_list_mtx);
+		break;
+	default:
+		return (EOPNOTSUPP);
+	}
+	return (0);
+}
+
+static moduledata_t bstp_mod = {
+	"bridgestp",
+	bstp_modevent,
+	0
+};
+
+DECLARE_MODULE(bridgestp, bstp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(bridgestp, 1);
+
+void
+bstp_attach(struct bstp_state *bs, struct bstp_cb_ops *cb)
+{
+	BSTP_LOCK_INIT(bs);
+	callout_init_mtx(&bs->bs_bstpcallout, &bs->bs_mtx, 0);
+	LIST_INIT(&bs->bs_bplist);
+
+	bs->bs_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
+	bs->bs_bridge_htime = BSTP_DEFAULT_HELLO_TIME;
+	bs->bs_bridge_fdelay = BSTP_DEFAULT_FORWARD_DELAY;
+	bs->bs_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
+	bs->bs_hold_time = BSTP_DEFAULT_HOLD_TIME;
+	bs->bs_migration_delay = BSTP_DEFAULT_MIGRATE_DELAY;
+	bs->bs_txholdcount = BSTP_DEFAULT_HOLD_COUNT;
+	bs->bs_protover = BSTP_PROTO_RSTP;
+	bs->bs_state_cb = cb->bcb_state;
+	bs->bs_rtage_cb = cb->bcb_rtage;
+
+	getmicrotime(&bs->bs_last_tc_time);
+
+	mtx_lock(&bstp_list_mtx);
+	LIST_INSERT_HEAD(&bstp_list, bs, bs_list);
+	mtx_unlock(&bstp_list_mtx);
+}
+
+void
+bstp_detach(struct bstp_state *bs)
+{
+	KASSERT(LIST_EMPTY(&bs->bs_bplist), ("bstp still active"));
+
+	mtx_lock(&bstp_list_mtx);
+	LIST_REMOVE(bs, bs_list);
+	mtx_unlock(&bstp_list_mtx);
+	callout_drain(&bs->bs_bstpcallout);
+	BSTP_LOCK_DESTROY(bs);
+}
+
+void
+bstp_init(struct bstp_state *bs)
+{
+	BSTP_LOCK(bs);
+	callout_reset(&bs->bs_bstpcallout, hz, bstp_tick, bs);
+	bs->bs_running = 1;
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+}
+
+void
+bstp_stop(struct bstp_state *bs)
+{
+	struct bstp_port *bp;
+
+	BSTP_LOCK(bs);
+
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
+		bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING);
+
+	bs->bs_running = 0;
+	callout_stop(&bs->bs_bstpcallout);
+	BSTP_UNLOCK(bs);
+}
+
+int
+bstp_create(struct bstp_state *bs, struct bstp_port *bp, struct ifnet *ifp)
+{
+	bzero(bp, sizeof(struct bstp_port));
+
+	BSTP_LOCK(bs);
+	bp->bp_ifp = ifp;
+	bp->bp_bs = bs;
+	bp->bp_priority = BSTP_DEFAULT_PORT_PRIORITY;
+	TASK_INIT(&bp->bp_statetask, 0, bstp_notify_state, bp);
+	TASK_INIT(&bp->bp_rtagetask, 0, bstp_notify_rtage, bp);
+
+	/* Init state */
+	bp->bp_infois = BSTP_INFO_DISABLED;
+	bp->bp_flags = BSTP_PORT_AUTOEDGE|BSTP_PORT_AUTOPTP;
+	bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING);
+	bstp_set_port_proto(bp, bs->bs_protover);
+	bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
+	bstp_set_port_tc(bp, BSTP_TCSTATE_INACTIVE);
+	bp->bp_path_cost = bstp_calc_path_cost(bp);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_enable(struct bstp_port *bp)
+{
+	struct bstp_state *bs = bp->bp_bs;
+	struct ifnet *ifp = bp->bp_ifp;
+
+	KASSERT(bp->bp_active == 0, ("already a bstp member"));
+
+	switch (ifp->if_type) {
+		case IFT_ETHER:	/* These can do spanning tree. */
+			break;
+		default:
+			/* Nothing else can. */
+			return (EINVAL);
+	}
+
+	BSTP_LOCK(bs);
+	LIST_INSERT_HEAD(&bs->bs_bplist, bp, bp_next);
+	bp->bp_active = 1;
+	bp->bp_flags |= BSTP_PORT_NEWINFO;
+	bstp_reinit(bs);
+	bstp_update_roles(bs, bp);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+void
+bstp_disable(struct bstp_port *bp)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	KASSERT(bp->bp_active == 1, ("not a bstp member"));
+
+	BSTP_LOCK(bs);
+	bstp_disable_port(bs, bp);
+	LIST_REMOVE(bp, bp_next);
+	bp->bp_active = 0;
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+}
+
+/*
+ * The bstp_port structure is about to be freed by the parent bridge.
+ */
+void
+bstp_destroy(struct bstp_port *bp)
+{
+	KASSERT(bp->bp_active == 0, ("port is still attached"));
+	taskqueue_drain(taskqueue_swi, &bp->bp_statetask);
+	taskqueue_drain(taskqueue_swi, &bp->bp_rtagetask);
 }
--- sys/net/net_osdep.h
+++ /dev/null
@@ -1,337 +0,0 @@
-/*	$FreeBSD: src/sys/net/net_osdep.h,v 1.15 2005/01/11 07:08:15 ume Exp $	*/
-/*	$KAME: net_osdep.h,v 1.80 2003/08/09 17:06:39 suz Exp $	*/
-
-/*-
- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the project nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * glue for kernel code programming differences.
- */
-
-/*
- * OS dependencies:
- * - ioctl
- *   FreeBSD 3 and later warn when sys/ioctl.h is included in a kernel source
- *   file.  For socket ioctl, we are suggested to use sys/sockio.h.
- *
- * - RTFREE()
- *   bsdi does not escape this macro using do-clause, so it is recommended
- *   to escape the macro explicitly.
- *   e.g.
- *	if (rt) {
- *		RTFREE(rt);
- *	}
- *
- * - whether the IPv4 input routine convert the byte order of some fileds
- *   of the IP header (x: convert to the host byte order, s: strip the header
- *   length for possible reassembly)
- *             ip_len ip_id ip_off
- * bsdi3:          xs     x      x
- * bsdi4:          xs            x
- * freebsd[23]:    xs     x      x 
- * freebsd4:       xs            x
- * NetBSD:          x            x
- * OpenBSD:        xs     x      x
- *
- * - ifa_ifwithaf()
- *   bsdi[34], netbsd, and openbsd define it in sys/net/if.c
- *   freebsd (all versions) does not have it.
- *  
- * - struct rt_addrinfo
- *   bsdi4, netbsd 1.5R and beyond: rti_addrs, rti_info[], rti_flags, rti_ifa,
- *	rti_ifp, and rti_rtm.
- *   others: rti_addrs and rti_info[] only.
- *
- * - ifa->ifa_rtrequest
- *   bsdi4, netbsd 1.5R and beyond: rt_addrinfo *
- *   others: sockaddr * (note that sys/net/route.c:rtrequest() has an unsafe
- *	typecast code, from 4.3BSD-reno)
- *
- * - side effects of rtrequest{,1}(RTM_DELETE)
- *	BSDI[34]: delete all cloned routes underneath the route.
- *	FreeBSD[234]: delete all protocol-cloned routes underneath the route.
- *		      note that cloned routes from an interface direct route
- *		      still remain.
- *	NetBSD: 1.5 have no side effects.  KAME/netbsd15, and post-1.5R, have
- *		the same effects as of BSDI.
- *	OpenBSD: have no side effects.  KAME/openbsd has the same effects as
- *		of BSDI (the change is not merged - yet).
- *
- * - privileged process
- *	NetBSD, FreeBSD 3
- *		struct proc *p;
- *		if (p && !suser(p->p_ucred, &p->p_acflag))
- *			privileged;
- *	FreeBSD 4
- *		struct proc *p;
- *		if (p && !suser(p))
- *			privileged;
- *	FreeBSD 5
- *		struct thread *td;
- *		if (suser(td))
- *			privileged;
- *	OpenBSD, BSDI [34], FreeBSD 2
- *		struct socket *so;
- *		if (so->so_state & SS_PRIV)
- *			privileged;
- * - foo_control
- *	NetBSD, FreeBSD 3
- *		needs to give struct proc * as argument
- *	OpenBSD, BSDI [34], FreeBSD 2
- *		do not need struct proc *
- *
- * - bpf:
- *	OpenBSD, NetBSD 1.5, BSDI [34]
- *		need caddr_t * (= if_bpf **) and struct ifnet *
- *	FreeBSD 2, FreeBSD 3, NetBSD post-1.5N
- *		need only struct ifnet * as argument
- *
- * - bpfattach:
- *	OpenBSD, NetBSD 1.5, BSDI [34]
- *		bpfattach(caddr_t *, struct ifnet *, u_int, u_int)
- *	FreeBSD, NetBSD 1.6:
- *		bpfattach(struct ifnet *, u_int, u_int)
- *
- * - bpf_mtap:
- *	OpenBSD, NetBSD, BSDI [34]
- *		bpf_mtap(caddr_t, struct mbuf *)
- *	FreeBSD
- *		bpf_mtap(struct ifnet *, struct mbuf *)
- *
- * - struct ifnet
- *			use queue.h?	member names	if name
- *			---		---		---
- *	FreeBSD 2	no		old standard	if_name+unit
- *	FreeBSD 3	yes		strange		if_name+unit
- *	FreeBSD 4	yes		strange		if_name+unit
- *	FreeBSD 5	yes		standard	if_xname
- *	OpenBSD		yes		standard	if_xname
- *	NetBSD		yes		standard	if_xname
- *	BSDI [34]	no		old standard	if_name+unit
- *
- * - usrreq
- *	NetBSD, OpenBSD, BSDI [34], FreeBSD 2
- *		single function with PRU_xx, arguments are mbuf
- *	FreeBSD 3
- *		separates functions, non-mbuf arguments
- *
- * - {set,get}sockopt
- *	NetBSD, OpenBSD, BSDI [34], FreeBSD 2
- *		manipulation based on mbuf
- *	FreeBSD 3
- *		non-mbuf manipulation using sooptcopy{in,out}()
- *
- * - timeout() and untimeout()
- *	NetBSD 1.4.x, OpenBSD, BSDI [34], FreeBSD 2
- *		timeout() is a void function
- *	FreeBSD 3
- *		timeout() is non-void, must keep returned value for untimeout()
- *		callout_xx is also available (sys/callout.h)
- *	NetBSD 1.5
- *		timeout() is obsoleted, use callout_xx (sys/callout.h)
- *	OpenBSD 2.8
- *		timeout_{add,set,del} is encouraged (sys/timeout.h)
- *
- * - kernel internal time structure
- *	FreeBSD 2, NetBSD, OpenBSD, BSD/OS
- *		mono_time.tv_u?sec, time.tv_u?sec
- *	FreeBSD [34]
- *		time_second
- *	if you need portability, #ifdef out FreeBSD[34], or use microtime(&tv)
- *	then touch tv.tv_sec (note: microtime is an expensive operation, so
- *	the use of mono_time is preferred).
- *
- * - sysctl
- *	NetBSD, OpenBSD
- *		foo_sysctl()
- *	BSDI [34]
- *		foo_sysctl() but with different style.  sysctl_int_arr() takes
- *		care of most of the cases.
- *	FreeBSD
- *		linker hack.  however, there are freebsd version differences
- *		(how wonderful!).
- *		on FreeBSD[23] function arg #define includes paren.
- *			int foo SYSCTL_HANDLER_ARGS;
- *		on FreeBSD4, function arg #define does not include paren.
- *			int foo(SYSCTL_HANDLER_ARGS);
- *		on some versions, forward reference to the tree is okay.
- *		on some versions, you need SYSCTL_DECL().  you need things
- *		like this.
- *			#ifdef SYSCTL_DECL
- *			SYSCTL_DECL(net_inet_ip6);
- *			#endif
- *		it is hard to share functions between freebsd and non-freebsd.
- *
- * - if_ioctl
- *	NetBSD, FreeBSD 3, BSDI [34]
- *		2nd argument is u_long cmd
- *	FreeBSD 2
- *		2nd argument is int cmd
- *
- * - if attach routines
- *	NetBSD
- *		void xxattach(int);
- *	FreeBSD 2, FreeBSD 3
- *		void xxattach(void *);
- *		PSEUDO_SET(xxattach, if_xx);
- *
- * - ovbcopy()
- *	in NetBSD 1.4 or later, ovbcopy() is not supplied in the kernel.
- *	we have updated sys/systm.h to include declaration.
- *
- * - splnet()
- *	NetBSD 1.4 or later, and OpenBSD, requires splsoftnet().
- *	other operating systems use splnet().
- *
- * - splimp()
- *	NetBSD 1.6: use splnet() in network, splvm() in vm.
- *	other operating systems: use splimp().
- *
- * - dtom()
- *	NEVER USE IT!
- *
- * - struct ifnet for loopback interface
- *	BSDI3: struct ifnet loif;
- *	BSDI4: struct ifnet *loifp;
- *	NetBSD, OpenBSD 2.8, FreeBSD2: struct ifnet loif[NLOOP];
- *	OpenBSD 2.9: struct ifnet *lo0ifp;
- *
- *	odd thing is that many of them refers loif as ifnet *loif,
- *	not loif[NLOOP], from outside of if_loop.c.
- *
- * - number of bpf pseudo devices
- *	others: bpfilter.h, NBPFILTER
- *	FreeBSD4: bpf.h, NBPF
- *	solution:
- *		#if defined(__FreeBSD__) && __FreeBSD__ >= 4
- *		#include "bpf.h"
- *		#define NBPFILTER	NBPF
- *		#else
- *		#include "bpfilter.h"
- *		#endif
- *
- * - protosw for IPv4 (sys/netinet)
- *	FreeBSD4: struct ipprotosw in netinet/ipprotosw.h
- *	others: struct protosw in sys/protosw.h
- *
- * - protosw in general.
- *	NetBSD 1.5 has extra member for ipfilter (netbsd-current dropped
- *	it so it went away in 1.6).
- *	NetBSD 1.5 requires PR_LISTEN flag bit with protocols that permit
- *	listen/accept (like tcp).
- *
- * - header files with defopt (opt_xx.h)
- *	FreeBSD3: opt_{inet,ipsec,ip6fw,altq}.h
- *	FreeBSD4: opt_{inet,inet6,ipsec,ip6fw,altq}.h
- *	NetBSD: opt_{inet,ipsec,altq}.h
- *	others: does not use defopt
- *
- * - IN_MULTICAST/IN_CLASS[A-D] macro.
- *	OpenBSD and NetBSD: net endian (kernel) or host endian (userland)
- *	others: always host endian
- *
- * - (m->m_flags & M_EXT) != 0 does *not* mean that the max data length of
- *   the mbuf == MCLBYTES.
- *
- * - sys/kern/uipc_mbuf.c:m_dup()
- *	freebsd[34]: copies the whole mbuf chain.
- *	netbsd: similar arg with m_copym().
- *	others: no m_dup().
- *
- * - ifa_refcnt (struct ifaddr) management (IFAREF/IFAFREE).
- *	NetBSD 1.5: always use IFAREF whenever reference gets added.
- *		always use IFAFREE whenever reference gets freed.
- *		IFAFREE frees ifaddr when ifa_refcnt reaches 0.
- *	others: do not increase refcnt for ifp->if_addrlist and in_ifaddr.
- *		use IFAFREE once when ifaddr is disconnected from
- *		ifp->if_addrlist and in_ifaddr.  IFAFREE frees ifaddr when
- *		ifa_refcnt goes negative.  in KAME environment, IFAREF is
- *		provided as a compatibility wrapper (use it instead of
- *		ifa_refcnt++ to reduce #ifdef).
- *
- * - ifnet.if_lastchange
- *	freebsd, bsdi, netbsd-current (jun 14 2001-),
- *	openbsd-current (jun 15 2001-): updated only when IFF_UP changes.
- *		(RFC1573 ifLastChange interpretation)
- *	netbsd151, openbsd29: updated whenever packets go through the interface.
- *		(4.4BSD interpretation)
- *
- * - kernel compilation options ("options HOGE" in kernel config file)
- *	freebsd4: sys/conf/options has to have mapping between option
- *		and a header file (opt_hoge.h).
- *	netbsd: by default, -DHOGE will go into
- *		sys/arch/foo/compile/BAR/Makefile.
- *		if you define mapping in sys/conf/files, you can create
- *		a header file like opt_hoge.h to help make dependencies.
- *	bsdi/openbsd: always use -DHOGE in Makefile.  there's no need/way
- *		to have opt_hoge.h.
- *
- *	therefore, opt_hoge.h is mandatory on freebsd4 only.
- *
- * - MALLOC() macro
- *	Use it only if the size of the allocation is constant.
- *	When we do NOT collect statistics about kernel memory usage, the result
- *	of macro expansion contains a large set of condition branches.  If the
- *	size is not constant, compilation optimization cannot be applied, and
- *	a bunch of the large branch will be embedded in the kernel code.
- *
- * - M_COPY_PKTHDR
- *	openbsd30, freebsd4(after 4.8): M_COPY_PKTHDR is deprecated.
- *		use M_MOVE_PKTHDR or M_DUP_PKTHDR, depending on how you want
- *		to handle m_tag.
- *	others: M_COPY_PKTHDR is available as usual.
- *
- * - M_READONLY() macro
- *	OpenBSD 3.0 and NetBSD 1.6 has it.
- *	FreeBSD 4.x uses M_WRITABLE() macro, which is opposite (NIH!).
- *	KAME tree has it for all platforms except FreeBSD 4.x.
- *
- * - TAILQ_EMPTY
- *	BSD/OS 4.x does not have this macro.
- */
-
-#ifndef __NET_NET_OSDEP_H_DEFINED_
-#define __NET_NET_OSDEP_H_DEFINED_
-#ifdef _KERNEL
-
-#define if_name(ifp)	((ifp)->if_xname)
-
-#define HAVE_NEW_BPFATTACH
-
-#define ifa_list	ifa_link
-#define if_addrlist	if_addrhead
-#define if_list		if_link
-
-#define HAVE_PPSRATECHECK
-
-/* sys/net/if.h */
-#define WITH_CONVERT_AND_STRIP_IP_LEN
-#define WITH_CONVERT_IP_OFF
-
-#endif /*_KERNEL*/
-#endif /*__NET_NET_OSDEP_H_DEFINED_ */
Index: bpf.h
===================================================================
RCS file: /home/cvs/src/sys/net/bpf.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/bpf.h -L sys/net/bpf.h -u -r1.1.1.1 -r1.2
--- sys/net/bpf.h
+++ sys/net/bpf.h
@@ -34,7 +34,7 @@
  *      @(#)bpf.h	8.1 (Berkeley) 6/10/93
  *	@(#)bpf.h	1.34 (LBL)     6/16/96
  *
- * $FreeBSD: src/sys/net/bpf.h,v 1.39.2.1 2005/09/29 23:48:04 csjp Exp $
+ * $FreeBSD: src/sys/net/bpf.h,v 1.47.2.1 2007/10/21 14:05:27 mlaier Exp $
  */
 
 #ifndef _NET_BPF_H_
@@ -109,12 +109,24 @@
 #define BIOCSRSIG	_IOW('B',115, u_int)
 #define BIOCGHDRCMPLT	_IOR('B',116, u_int)
 #define BIOCSHDRCMPLT	_IOW('B',117, u_int)
-#define BIOCGSEESENT	_IOR('B',118, u_int)
-#define BIOCSSEESENT	_IOW('B',119, u_int)
+#define BIOCGDIRECTION	_IOR('B',118, u_int)
+#define BIOCSDIRECTION	_IOW('B',119, u_int)
 #define	BIOCSDLT	_IOW('B',120, u_int)
 #define	BIOCGDLTLIST	_IOWR('B',121, struct bpf_dltlist)
 #define	BIOCLOCK	_IO('B', 122)
 #define	BIOCSETWF	_IOW('B',123, struct bpf_program)
+#define	BIOCFEEDBACK	_IOW('B',124, u_int)
+
+/* Obsolete */
+#define	BIOCGSEESENT	BIOCGDIRECTION
+#define	BIOCSSEESENT	BIOCSDIRECTION
+
+/* Packet directions */
+enum bpf_direction {
+	BPF_D_IN,	/* See incoming packets */
+	BPF_D_INOUT,	/* See incoming and outgoing packets */
+	BPF_D_OUT	/* See outgoing packets */
+};
 
 /*
  * Structure prepended to each packet.
@@ -523,6 +535,152 @@
 #define DLT_LINUX_LAPD		177
 
 /*
+ * Juniper-private data link type, as per request from
+ * Hannes Gredler <hannes at juniper.net>. 
+ * The DLT_ are used for prepending meta-information
+ * like interface index, interface name
+ * before standard Ethernet, PPP, Frelay & C-HDLC Frames
+ */
+#define DLT_JUNIPER_ETHER       178
+#define DLT_JUNIPER_PPP         179
+#define DLT_JUNIPER_FRELAY      180
+#define DLT_JUNIPER_CHDLC       181
+
+/*
+ * Multi Link Frame Relay (FRF.16)
+ */
+#define DLT_MFR                 182
+
+/*
+ * Juniper-private data link type, as per request from
+ * Hannes Gredler <hannes at juniper.net>. 
+ * The DLT_ is used for internal communication with a
+ * voice Adapter Card (PIC)
+ */
+#define DLT_JUNIPER_VP          183
+
+/*
+ * Arinc 429 frames.
+ * DLT_ requested by Gianluca Varenni <gianluca.varenni at cacetech.com>.
+ * Every frame contains a 32bit A429 label.
+ * More documentation on Arinc 429 can be found at
+ * http://www.condoreng.com/support/downloads/tutorials/ARINCTutorial.pdf
+ */
+#define DLT_A429                184
+
+/*
+ * Arinc 653 Interpartition Communication messages.
+ * DLT_ requested by Gianluca Varenni <gianluca.varenni at cacetech.com>.
+ * Please refer to the A653-1 standard for more information.
+ */
+#define DLT_A653_ICM            185
+
+/*
+ * USB packets, beginning with a USB setup header; requested by
+ * Paolo Abeni <paolo.abeni at email.it>.
+ */
+#define DLT_USB			186
+
+/*
+ * Bluetooth HCI UART transport layer (part H:4); requested by
+ * Paolo Abeni.
+ */
+#define DLT_BLUETOOTH_HCI_H4	187
+
+/*
+ * IEEE 802.16 MAC Common Part Sublayer; requested by Maria Cruz
+ * <cruz_petagay at bah.com>.
+ */
+#define DLT_IEEE802_16_MAC_CPS	188
+
+/*
+ * USB packets, beginning with a Linux USB header; requested by
+ * Paolo Abeni <paolo.abeni at email.it>.
+ */
+#define DLT_USB_LINUX		189
+
+/*
+ * Controller Area Network (CAN) v. 2.0B packets.
+ * DLT_ requested by Gianluca Varenni <gianluca.varenni at cacetech.com>.
+ * Used to dump CAN packets coming from a CAN Vector board.
+ * More documentation on the CAN v2.0B frames can be found at
+ * http://www.can-cia.org/downloads/?269
+ */
+#define DLT_CAN20B              190
+
+/*
+ * IEEE 802.15.4, with address fields padded, as is done by Linux
+ * drivers; requested by Juergen Schimmer.
+ */
+#define DLT_IEEE802_15_4_LINUX	191
+
+/*
+ * Per Packet Information encapsulated packets.
+ * DLT_ requested by Gianluca Varenni <gianluca.varenni at cacetech.com>.
+ */
+#define DLT_PPI			192
+
+/*
+ * Header for 802.16 MAC Common Part Sublayer plus a radiotap radio header;
+ * requested by Charles Clancy.
+ */
+#define DLT_IEEE802_16_MAC_CPS_RADIO	193
+
+/*
+ * Juniper-private data link type, as per request from
+ * Hannes Gredler <hannes at juniper.net>. 
+ * The DLT_ is used for internal communication with a
+ * integrated service module (ISM).
+ */
+#define DLT_JUNIPER_ISM         194
+
+/*
+ * IEEE 802.15.4, exactly as it appears in the spec (no padding, no
+ * nothing); requested by Mikko Saarnivala <mikko.saarnivala at sensinode.com>.
+ */
+#define DLT_IEEE802_15_4	195
+
+/*
+ * Various link-layer types, with a pseudo-header, for SITA
+ * (http://www.sita.aero/); requested by Fulko Hew (fulko.hew at gmail.com).
+ */
+#define DLT_SITA		196
+
+/*
+ * Various link-layer types, with a pseudo-header, for Endace DAG cards;
+ * encapsulates Endace ERF records.  Requested by Stephen Donnelly
+ * <stephen at endace.com>.
+ */
+#define DLT_ERF			197
+
+/*
+ * Special header prepended to Ethernet packets when capturing from a
+ * u10 Networks board.  Requested by Phil Mulholland
+ * <phil at u10networks.com>.
+ */
+#define DLT_RAIF1		198
+
+/*
+ * IPMB packet for IPMI, beginning with the I2C slave address, followed
+ * by the netFn and LUN, etc..  Requested by Chanthy Toeung
+ * <chanthy.toeung at ca.kontron.com>.
+ */
+#define DLT_IPMB		199
+
+/*
+ * Juniper-private data link type, as per request from
+ * Hannes Gredler <hannes at juniper.net>. 
+ * The DLT_ is used for capturing data on a secure tunnel interface.
+ */
+#define DLT_JUNIPER_ST          200
+
+/*
+ * Bluetooth HCI UART transport layer (part H:4), with pseudo-header
+ * that includes direction information; requested by Paolo Abeni.
+ */
+#define DLT_BLUETOOTH_HCI_H4_WITH_PHDR	201
+
+/*
  * The instruction encodings.
  */
 /* instruction classes */
@@ -603,7 +761,18 @@
 };
 
 #ifdef _KERNEL
-struct bpf_if;
+/*
+ * Descriptor associated with each attached hardware interface.
+ */
+struct bpf_if {
+	LIST_ENTRY(bpf_if)	bif_next;	/* list of all interfaces */
+	LIST_HEAD(, bpf_d)	bif_dlist;	/* descriptor list */
+	u_int bif_dlt;				/* link layer type */
+	u_int bif_hdrlen;		/* length of header (with padding) */
+	struct ifnet *bif_ifp;		/* corresponding interface */
+	struct mtx	bif_mtx;	/* mutex for interface */
+};
+
 int	 bpf_validate(const struct bpf_insn *, int);
 void	 bpf_tap(struct bpf_if *, u_char *, u_int);
 void	 bpf_mtap(struct bpf_if *, struct mbuf *);
@@ -615,18 +784,27 @@
 void	 bpfilterattach(int);
 u_int	 bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int);
 
+static __inline int
+bpf_peers_present(struct bpf_if *bpf)
+{
+
+	if (!LIST_EMPTY(&bpf->bif_dlist))
+		return (1);
+	return (0);
+}
+
 #define	BPF_TAP(_ifp,_pkt,_pktlen) do {				\
-	if ((_ifp)->if_bpf)					\
+	if (bpf_peers_present((_ifp)->if_bpf))			\
 		bpf_tap((_ifp)->if_bpf, (_pkt), (_pktlen));	\
 } while (0)
 #define	BPF_MTAP(_ifp,_m) do {					\
-	if ((_ifp)->if_bpf) {					\
+	if (bpf_peers_present((_ifp)->if_bpf)) {		\
 		M_ASSERTVALID(_m);				\
 		bpf_mtap((_ifp)->if_bpf, (_m));			\
 	}							\
 } while (0)
 #define	BPF_MTAP2(_ifp,_data,_dlen,_m) do {			\
-	if ((_ifp)->if_bpf) {					\
+	if (bpf_peers_present((_ifp)->if_bpf)) {		\
 		M_ASSERTVALID(_m);				\
 		bpf_mtap2((_ifp)->if_bpf,(_data),(_dlen),(_m));	\
 	}							\
Index: if_ethersubr.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/net/if_ethersubr.c -L sys/net/if_ethersubr.c -u -r1.3 -r1.4
--- sys/net/if_ethersubr.c
+++ sys/net/if_ethersubr.c
@@ -27,15 +27,13 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/net/if_ethersubr.c,v 1.193.2.10 2006/03/04 09:23:34 oleg Exp $
- * $MidnightBSD$
+ * $FreeBSD: src/sys/net/if_ethersubr.c,v 1.236.2.1 2007/10/28 16:24:16 thompsa Exp $
  */
 
 #include "opt_atalk.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipx.h"
-#include "opt_bdg.h"
 #include "opt_mac.h"
 #include "opt_netgraph.h"
 #include "opt_carp.h"
@@ -43,7 +41,6 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
@@ -61,9 +58,9 @@
 #include <net/if_types.h>
 #include <net/bpf.h>
 #include <net/ethernet.h>
-#include <net/bridge.h>
 #include <net/if_bridgevar.h>
 #include <net/if_vlan_var.h>
+#include <net/pf_mtag.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
@@ -99,7 +96,8 @@
 extern u_char	at_org_code[3];
 extern u_char	aarp_org_code[3];
 #endif /* NETATALK */
-static int mtu_is_mru = 0;
+
+#include <security/mac/mac_framework.h>
 
 /* netgraph node hooks for ng_ether(4) */
 void	(*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
@@ -110,18 +108,15 @@
 
 void	(*vlan_input_p)(struct ifnet *, struct mbuf *);
 
-/* bridge support */
-int do_bridge;
-bridge_in_t *bridge_in_ptr;
-bdg_forward_t *bdg_forward_ptr;
-bdgtakeifaces_t *bdgtakeifaces_ptr;
-struct bdg_softc *ifp2sc;
-
+/* if_bridge(4) support */
 struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *); 
 int	(*bridge_output_p)(struct ifnet *, struct mbuf *, 
 		struct sockaddr *, struct rtentry *);
 void	(*bridge_dn_p)(struct mbuf *, struct ifnet *);
 
+/* if_lagg(4) support */
+struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); 
+
 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
@@ -131,6 +126,9 @@
 /* XXX: should be in an arp support file, not here */
 MALLOC_DEFINE(M_ARPCOM, "arpcom", "802.* interface internals");
 
+#define	ETHER_IS_BROADCAST(addr) \
+	(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
+
 #define senderr(e) do { error = (e); goto bad;} while (0)
 
 #if defined(INET) || defined(INET6)
@@ -154,6 +152,7 @@
 	int error, hdrcmplt = 0;
 	u_char esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN];
 	struct ether_header *eh;
+	struct pf_mtag *t;
 	int loop_copy = 1;
 	int hlen;	/* link layer header length */
 
@@ -291,17 +290,9 @@
 		(void)memcpy(eh->ether_shost, esrc,
 			sizeof(eh->ether_shost));
 	else
-		(void)memcpy(eh->ether_shost, IFP2ENADDR(ifp),
+		(void)memcpy(eh->ether_shost, IF_LLADDR(ifp),
 			sizeof(eh->ether_shost));
 
-       /*
-	* Bridges require special output handling.
-	*/
-	if (ifp->if_bridge) {
-		BRIDGE_OUTPUT(ifp, m, error);
-		return (error);
-	}
-
 	/*
 	 * If a simplex interface, and the packet is being sent to our
 	 * Ethernet address or a broadcast address, loopback a copy.
@@ -312,7 +303,7 @@
 	 * reasons and compatibility with the original behavior.
 	 */
 	if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy &&
-	    m_tag_find(m, PACKET_TAG_PF_ROUTED, NULL) == NULL) {
+	    ((t = pf_find_mtag(m)) == NULL || !t->routed)) {
 		int csum_flags = 0;
 
 		if (m->m_pkthdr.csum_flags & CSUM_IP)
@@ -323,7 +314,19 @@
 		if (m->m_flags & M_BCAST) {
 			struct mbuf *n;
 
-			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
+			/*
+			 * Because if_simloop() modifies the packet, we need a
+			 * writable copy through m_dup() instead of a readonly
+			 * one as m_copy[m] would give us. The alternative would
+			 * be to modify if_simloop() to handle the readonly mbuf,
+			 * but performancewise it is mostly equivalent (trading
+			 * extra data copying vs. extra locking).
+			 *
+			 * XXX This is a local workaround.  A number of less
+			 * often used kernel parts suffer from the same bug.
+			 * See PR kern/105943 for a proposed general solution.
+			 */
+			if ((n = m_dup(m, M_DONTWAIT)) != NULL) {
 				n->m_pkthdr.csum_flags |= csum_flags;
 				if (csum_flags & CSUM_DATA_VALID)
 					n->m_pkthdr.csum_data = 0xffff;
@@ -340,6 +343,14 @@
 		}
 	}
 
+       /*
+	* Bridges require special output handling.
+	*/
+	if (ifp->if_bridge) {
+		BRIDGE_OUTPUT(ifp, m, error);
+		return (error);
+	}
+
 #ifdef DEV_CARP
 	if (ifp->if_carp &&
 	    (error = carp_output(ifp, m, dst, NULL)))
@@ -348,6 +359,8 @@
 
 	/* Handle ng_ether(4) processing, if any */
 	if (IFP2AC(ifp)->ac_netgraph != NULL) {
+		KASSERT(ng_ether_output_p != NULL,
+		    ("ng_ether_output_p is NULL"));
 		if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
 bad:			if (m != NULL)
 				m_freem(m);
@@ -370,26 +383,10 @@
 int
 ether_output_frame(struct ifnet *ifp, struct mbuf *m)
 {
+	int error;
 #if defined(INET) || defined(INET6)
 	struct ip_fw *rule = ip_dn_claim_rule(m);
-#else
-	void *rule = NULL;
-#endif
-	int error;
 
-	if (rule == NULL && BDG_ACTIVE(ifp)) {
-		/*
-		 * Beware, the bridge code notices the null rcvif and
-		 * uses that identify that it's being called from
-		 * ether_output as opposd to ether_input.  Yech.
-		 */
-		m->m_pkthdr.rcvif = NULL;
-		m = bdg_forward_ptr(m, ifp);
-		if (m != NULL)
-			m_freem(m);
-		return (0);
-	}
-#if defined(INET) || defined(INET6)
 	if (IPFW_LOADED && ether_ipfw != 0) {
 		if (ether_ipfw_chk(&m, ifp, &rule, 0) == 0) {
 			if (m) {
@@ -413,9 +410,7 @@
 /*
  * ipfw processing for ethernet packets (in and out).
  * The second parameter is NULL from ether_demux, and ifp from
- * ether_output_frame. This section of code could be used from
- * bridge.c as well as long as we use some extra info
- * to distinguish that case from ether_output_frame();
+ * ether_output_frame.
  */
 int
 ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst,
@@ -517,6 +512,17 @@
 	struct ether_header *eh;
 	u_short etype;
 
+	if ((ifp->if_flags & IFF_UP) == 0) {
+		m_freem(m);
+		return;
+	}
+#ifdef DIAGNOSTIC
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+		if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n");
+		m_freem(m);
+		return;
+	}
+#endif
 	/*
 	 * Do consistency checks to verify assumptions
 	 * made by code past this point.
@@ -538,19 +544,6 @@
 	}
 	eh = mtod(m, struct ether_header *);
 	etype = ntohs(eh->ether_type);
-	if (mtu_is_mru) {
-		if (m->m_pkthdr.len >
-			ETHER_MAX_FRAME(ifp, etype, m->m_flags & M_HASFCS)) {
-				if_printf(ifp, "discard oversize frame "
-					"(ether type %x flags %x len %u > max %lu(\n",
-					etype, m->m_flags, m->m_pkthdr.len,
-					ETHER_MAX_FRAME(ifp, etype,
-						m->m_flags & M_HASFCS));
-			ifp->if_ierrors++;
-			m_freem(m);
-			return;
-		}
-	}
 	if (m->m_pkthdr.rcvif == NULL) {
 		if_printf(ifp, "discard frame w/o interface pointer\n");
 		ifp->if_ierrors++;
@@ -564,6 +557,14 @@
 	}
 #endif
 
+	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
+		if (ETHER_IS_BROADCAST(eh->ether_dhost))
+			m->m_flags |= M_BCAST;
+		else
+			m->m_flags |= M_MCAST;
+		ifp->if_imcasts++;
+	}
+
 #ifdef MAC
 	/*
 	 * Tag the mbuf with an appropriate MAC label before any other
@@ -575,52 +576,119 @@
 	/*
 	 * Give bpf a chance at the packet.
 	 */
-	BPF_MTAP(ifp, m);
+	ETHER_BPF_MTAP(ifp, m);
+
+	/*
+	 * If the CRC is still on the packet, trim it off. We do this once
+	 * and once only in case we are re-entered. Nothing else on the
+	 * Ethernet receive path expects to see the FCS.
+	 */
+	if (m->m_flags & M_HASFCS) {
+		m_adj(m, -ETHER_CRC_LEN);
+		m->m_flags &= ~M_HASFCS;
+	}
 
+	ifp->if_ibytes += m->m_pkthdr.len;
+
+	/* Allow monitor mode to claim this frame, after stats are updated. */
 	if (ifp->if_flags & IFF_MONITOR) {
-		/*
-		 * Interface marked for monitoring; discard packet.
-		 */
 		m_freem(m);
 		return;
 	}
 
-	/* If the CRC is still on the packet, trim it off. */
-	if (m->m_flags & M_HASFCS) {
-		m_adj(m, -ETHER_CRC_LEN);
-		m->m_flags &= ~M_HASFCS;
+	/* Handle input from a lagg(4) port */
+	if (ifp->if_type == IFT_IEEE8023ADLAG) {
+		KASSERT(lagg_input_p != NULL,
+		    ("%s: if_lagg not loaded!", __func__));
+		m = (*lagg_input_p)(ifp, m);
+		if (m != NULL)
+			ifp = m->m_pkthdr.rcvif;
+		else 
+			return;
 	}
 
-	ifp->if_ibytes += m->m_pkthdr.len;
+	/*
+	 * If the hardware did not process an 802.1Q tag, do this now,
+	 * to allow 802.1P priority frames to be passed to the main input
+	 * path correctly.
+	 * TODO: Deal with Q-in-Q frames, but not arbitrary nesting levels.
+	 */
+	if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_VLAN) {
+		struct ether_vlan_header *evl;
 
-	/* Handle ng_ether(4) processing, if any */
+		if (m->m_len < sizeof(*evl) &&
+		    (m = m_pullup(m, sizeof(*evl))) == NULL) {
+#ifdef DIAGNOSTIC
+			if_printf(ifp, "cannot pullup VLAN header\n");
+#endif
+			ifp->if_ierrors++;
+			m_freem(m);
+			return;
+		}
+
+		evl = mtod(m, struct ether_vlan_header *);
+		m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
+		m->m_flags |= M_VLANTAG;
+
+		bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
+		    ETHER_HDR_LEN - ETHER_TYPE_LEN);
+		m_adj(m, ETHER_VLAN_ENCAP_LEN);
+	}
+
+	/* Allow ng_ether(4) to claim this frame. */
 	if (IFP2AC(ifp)->ac_netgraph != NULL) {
+		KASSERT(ng_ether_input_p != NULL,
+		    ("%s: ng_ether_input_p is NULL", __func__));
+		m->m_flags &= ~M_PROMISC;
 		(*ng_ether_input_p)(ifp, &m);
 		if (m == NULL)
 			return;
 	}
 
 	/*
-	 * Tap the packet off here for a bridge.  bridge_input()
-	 * will return NULL if it has consumed the packet, otherwise
-	 * it gets processed as normal.  Note that bridge_input()
-	 * will always return the original packet if we need to
-	 * process it locally.
+	 * Allow if_bridge(4) to claim this frame.
+	 * The BRIDGE_INPUT() macro will update ifp if the bridge changed it
+	 * and the frame should be delivered locally.
 	 */
-	if (ifp->if_bridge) {
+	if (ifp->if_bridge != NULL) {
+		m->m_flags &= ~M_PROMISC;
 		BRIDGE_INPUT(ifp, m);
 		if (m == NULL)
 			return;
 	}
 
-	/* Check for bridging mode */
-	if (BDG_ACTIVE(ifp) )
-		if ((m = bridge_in_ptr(ifp, m)) == NULL)
-			return;
+#ifdef DEV_CARP
+	/*
+	 * Clear M_PROMISC on frame so that carp(4) will see it when the
+	 * mbuf flows up to Layer 3.
+	 * FreeBSD's implementation of carp(4) uses the inprotosw
+	 * to dispatch IPPROTO_CARP. carp(4) also allocates its own
+	 * Ethernet addresses of the form 00:00:5e:00:01:xx, which
+	 * is outside the scope of the M_PROMISC test below.
+	 * TODO: Maintain a hash table of ethernet addresses other than
+	 * ether_dhost which may be active on this ifp.
+	 */
+	if (ifp->if_carp && carp_forus(ifp->if_carp, eh->ether_dhost)) {
+		m->m_flags &= ~M_PROMISC;
+	} else
+#endif
+	{
+		/*
+		 * If the frame received was not for our MAC address, set the
+		 * M_PROMISC flag on the mbuf chain. The frame may need to
+		 * be seen by the rest of the Ethernet input path in case of
+		 * re-entry (e.g. bridge, vlan, netgraph) but should not be
+		 * seen by upper protocol layers.
+		 */
+		if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
+		    bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
+			m->m_flags |= M_PROMISC;
+	}
 
 	/* First chunk of an mbuf contains good entropy */
 	if (harvest.ethernet)
 		random_harvest(m, 16, 3, 0, RANDOM_NET);
+
 	ether_demux(ifp, m);
 }
 
@@ -636,138 +704,70 @@
 #if defined(NETATALK)
 	struct llc *l;
 #endif
-#if defined(INET) || defined(INET6)
-	struct ip_fw *rule = ip_dn_claim_rule(m);
-#endif
 
-	KASSERT(ifp != NULL, ("ether_demux: NULL interface pointer"));
-
-	eh = mtod(m, struct ether_header *);
-	ether_type = ntohs(eh->ether_type);
+	KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
 
 #if defined(INET) || defined(INET6)
-	if (rule)	/* packet was already bridged */
-		goto post_stats;
-#endif
-
-	if (!(BDG_ACTIVE(ifp)) && !(ifp->if_bridge) &&
-	    !((ether_type == ETHERTYPE_VLAN || m->m_flags & M_VLANTAG) &&
-	    ifp->if_nvlans > 0)) {
-#ifdef DEV_CARP
-		/*
-		 * XXX: Okay, we need to call carp_forus() and - if it is for
-		 * us jump over code that does the normal check
-		 * "IFP2ENADDR(ifp) == ether_dhost". The check sequence is a bit
-		 * different from OpenBSD, so we jump over as few code as
-		 * possible, to catch _all_ sanity checks. This needs
-		 * evaluation, to see if the carp ether_dhost values break any
-		 * of these checks!
-		 */
-		if (ifp->if_carp && carp_forus(ifp->if_carp, eh->ether_dhost))
-			goto pre_stats;
-#endif
-		/*
-		 * Discard packet if upper layers shouldn't see it because it
-		 * was unicast to a different Ethernet address. If the driver
-		 * is working properly, then this situation can only happen
-		 * when the interface is in promiscuous mode.
-		 *
-		 * If VLANs are active, and this packet has a VLAN tag, do
-		 * not drop it here but pass it on to the VLAN layer, to
-		 * give them a chance to consider it as well (e. g. in case
-		 * bridging is only active on a VLAN).  They will drop it if
-		 * it's undesired.
-		 */
-		if ((ifp->if_flags & IFF_PROMISC) != 0
-		    && !ETHER_IS_MULTICAST(eh->ether_dhost)
-		    && bcmp(eh->ether_dhost,
-		      IFP2ENADDR(ifp), ETHER_ADDR_LEN) != 0
-		    && (ifp->if_flags & IFF_PPROMISC) == 0) {
-			    m_freem(m);
-			    return;
-		}
-	}
-
-#ifdef DEV_CARP
-pre_stats:
-#endif
-	/* Discard packet if interface is not up */
-	if ((ifp->if_flags & IFF_UP) == 0) {
-		m_freem(m);
-		return;
-	}
-	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
-		if (bcmp(etherbroadcastaddr, eh->ether_dhost,
-		    sizeof(etherbroadcastaddr)) == 0)
-			m->m_flags |= M_BCAST;
-		else
-			m->m_flags |= M_MCAST;
-	}
-	if (m->m_flags & (M_BCAST|M_MCAST))
-		ifp->if_imcasts++;
+	/*
+	 * Allow dummynet and/or ipfw to claim the frame.
+	 * Do not do this for PROMISC frames in case we are re-entered.
+	 */
+	if (IPFW_LOADED && ether_ipfw != 0 && !(m->m_flags & M_PROMISC)) {
+		struct ip_fw *rule = ip_dn_claim_rule(m);
 
-#if defined(INET) || defined(INET6)
-post_stats:
-	if (IPFW_LOADED && ether_ipfw != 0) {
 		if (ether_ipfw_chk(&m, NULL, &rule, 0) == 0) {
 			if (m)
-				m_freem(m);
-			return;
+				m_freem(m);	/* dropped; free mbuf chain */
+			return;			/* consumed */
 		}
 	}
 #endif
+	eh = mtod(m, struct ether_header *);
+	ether_type = ntohs(eh->ether_type);
 
 	/*
-	 * Check to see if the device performed the VLAN decapsulation and
-	 * provided us with the tag.
+	 * If this frame has a VLAN tag other than 0, call vlan_input()
+	 * if its module is loaded. Otherwise, drop.
 	 */
-	if (m->m_flags & M_VLANTAG) {
-		/*
-		 * If no VLANs are configured, drop.
-		 */
-		if (ifp->if_nvlans == 0) {
+	if ((m->m_flags & M_VLANTAG) &&
+	    EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
+		if (ifp->if_vlantrunk == NULL) {
 			ifp->if_noproto++;
 			m_freem(m);
 			return;
 		}
-		/*
-		 * vlan_input() will either recursively call ether_input()
-		 * or drop the packet.
-		 */
-		KASSERT(vlan_input_p != NULL,("ether_input: VLAN not loaded!"));
+		KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!",
+		    __func__));
+		/* Clear before possibly re-entering ether_input(). */
+		m->m_flags &= ~M_PROMISC;
 		(*vlan_input_p)(ifp, m);
 		return;
 	}
 
 	/*
-	 * Handle protocols that expect to have the Ethernet header
-	 * (and possibly FCS) intact.
+	 * Pass promiscuously received frames to the upper layer if the user
+	 * requested this by setting IFF_PPROMISC. Otherwise, drop them.
 	 */
-	switch (ether_type) {
-	case ETHERTYPE_VLAN:
-		if (ifp->if_nvlans != 0) {
-			KASSERT(vlan_input_p,("ether_input: VLAN not loaded!"));
-			(*vlan_input_p)(ifp, m);
-		} else {
-			ifp->if_noproto++;
-			m_freem(m);
-		}
+	if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) {
+		m_freem(m);
 		return;
 	}
 
-	/* Strip off Ethernet header. */
+	/*
+	 * Reset layer specific mbuf flags to avoid confusing upper layers.
+	 * Strip off Ethernet header.
+	 */
+	m->m_flags &= ~M_VLANTAG;
+	m->m_flags &= ~(M_PROTOFLAGS);
 	m_adj(m, ETHER_HDR_LEN);
 
-	/* If the CRC is still on the packet, trim it off. */
-	if (m->m_flags & M_HASFCS) {
-		m_adj(m, -ETHER_CRC_LEN);
-		m->m_flags &= ~M_HASFCS;
-	}
-
+	/*
+	 * Dispatch frame to upper layer.
+	 */
 	switch (ether_type) {
 #ifdef INET
 	case ETHERTYPE_IP:
-		if (ip_fastforward(m))
+		if ((m = ip_fastforward(m)) == NULL)
 			return;
 		isr = NETISR_IP;
 		break;
@@ -841,6 +841,8 @@
 	 * otherwise dispose of it.
 	 */
 	if (IFP2AC(ifp)->ac_netgraph != NULL) {
+		KASSERT(ng_ether_input_orphan_p != NULL,
+		    ("ng_ether_input_orphan_p is NULL"));
 		/*
 		 * Put back the ethernet header so netgraph has a
 		 * consistent view of inbound packets.
@@ -872,7 +874,7 @@
  * Perform common duties while attaching to interface list
  */
 void
-ether_ifattach(struct ifnet *ifp, const u_int8_t *llc)
+ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
 {
 	int i;
 	struct ifaddr *ifa;
@@ -889,32 +891,24 @@
 		ifp->if_baudrate = IF_Mbps(10);		/* just a default */
 	ifp->if_broadcastaddr = etherbroadcastaddr;
 
-	ifa = ifaddr_byindex(ifp->if_index);
+	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_ETHER;
 	sdl->sdl_alen = ifp->if_addrlen;
-	bcopy(llc, LLADDR(sdl), ifp->if_addrlen);
-	/*
-	 * XXX: This doesn't belong here; we do it until
-	 * XXX:  all drivers are cleaned up
-	 */
-	if (llc != IFP2ENADDR(ifp))
-		bcopy(llc, IFP2ENADDR(ifp), ifp->if_addrlen);
+	bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
 	bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
 	if (ng_ether_attach_p != NULL)
 		(*ng_ether_attach_p)(ifp);
-	if (BDG_LOADED)
-		bdgtakeifaces_ptr();
 
 	/* Announce Ethernet MAC address if non-zero. */
 	for (i = 0; i < ifp->if_addrlen; i++)
-		if (llc[i] != 0)
+		if (lla[i] != 0)
 			break; 
 	if (i != ifp->if_addrlen)
-		if_printf(ifp, "Ethernet address: %6D\n", llc, ":");
-	if (debug_mpsafenet && (ifp->if_flags & IFF_NEEDSGIANT) != 0)
+		if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
+	if (ifp->if_flags & IFF_NEEDSGIANT)
 		if_printf(ifp, "if_start running deferred for Giant\n");
 }
 
@@ -924,19 +918,18 @@
 void
 ether_ifdetach(struct ifnet *ifp)
 {
-	if (IFP2AC(ifp)->ac_netgraph != NULL)
+	if (IFP2AC(ifp)->ac_netgraph != NULL) {
+		KASSERT(ng_ether_detach_p != NULL,
+		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
+	}
 
 	bpfdetach(ifp);
 	if_detach(ifp);
-	if (BDG_LOADED)
-		bdgtakeifaces_ptr();
 }
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
-SYSCTL_INT(_net_link_ether, OID_AUTO, MTUisMRU, CTLFLAG_RW,
-	    &mtu_is_mru,0,"Allow MTU to limit recieved packet size");
 #if defined(INET) || defined(INET6)
 SYSCTL_INT(_net_link_ether, OID_AUTO, ipfw, CTLFLAG_RW,
 	    &ether_ipfw,0,"Pass ether pkts through firewall");
@@ -1016,7 +1009,7 @@
 }
 
 int
-ether_ioctl(struct ifnet *ifp, int command, caddr_t data)
+ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ifaddr *ifa = (struct ifaddr *) data;
 	struct ifreq *ifr = (struct ifreq *) data;
@@ -1044,10 +1037,10 @@
 			if (ipx_nullhost(*ina))
 				ina->x_host =
 				    *(union ipx_host *)
-				    IFP2ENADDR(ifp);
+				    IF_LLADDR(ifp);
 			else {
 				bcopy((caddr_t) ina->x_host.c_host,
-				      (caddr_t) IFP2ENADDR(ifp),
+				      (caddr_t) IF_LLADDR(ifp),
 				      ETHER_ADDR_LEN);
 			}
 
@@ -1069,7 +1062,7 @@
 			struct sockaddr *sa;
 
 			sa = (struct sockaddr *) & ifr->ifr_data;
-			bcopy(IFP2ENADDR(ifp),
+			bcopy(IF_LLADDR(ifp),
 			      (caddr_t) sa->sa_data, ETHER_ADDR_LEN);
 		}
 		break;
@@ -1216,5 +1209,73 @@
 	0
 };
 
+void
+ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen)
+{
+	struct ether_vlan_header vlan;
+	struct mbuf mv, mb;
+
+	KASSERT((m->m_flags & M_VLANTAG) != 0,
+	    ("%s: vlan information not present", __func__));
+	KASSERT(m->m_len >= sizeof(struct ether_header),
+	    ("%s: mbuf not large enough for header", __func__));
+	bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header));
+	vlan.evl_proto = vlan.evl_encap_proto;
+	vlan.evl_encap_proto = htons(ETHERTYPE_VLAN);
+	vlan.evl_tag = htons(m->m_pkthdr.ether_vtag);
+	m->m_len -= sizeof(struct ether_header);
+	m->m_data += sizeof(struct ether_header);
+	/*
+	 * If a data link has been supplied by the caller, then we will need to
+	 * re-create a stack allocated mbuf chain with the following structure:
+	 *
+	 * (1) mbuf #1 will contain the supplied data link
+	 * (2) mbuf #2 will contain the vlan header
+	 * (3) mbuf #3 will contain the original mbuf's packet data
+	 *
+	 * Otherwise, submit the packet and vlan header via bpf_mtap2().
+	 */
+	if (data != NULL) {
+		mv.m_next = m;
+		mv.m_data = (caddr_t)&vlan;
+		mv.m_len = sizeof(vlan);
+		mb.m_next = &mv;
+		mb.m_data = data;
+		mb.m_len = dlen;
+		bpf_mtap(bp, &mb);
+	} else
+		bpf_mtap2(bp, &vlan, sizeof(vlan), m);
+	m->m_len += sizeof(struct ether_header);
+	m->m_data -= sizeof(struct ether_header);
+}
+
+struct mbuf *
+ether_vlanencap(struct mbuf *m, uint16_t tag)
+{
+	struct ether_vlan_header *evl;
+
+	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
+	if (m == NULL)
+		return (NULL);
+	/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
+
+	if (m->m_len < sizeof(*evl)) {
+		m = m_pullup(m, sizeof(*evl));
+		if (m == NULL)
+			return (NULL);
+	}
+
+	/*
+	 * Transform the Ethernet header into an Ethernet header
+	 * with 802.1Q encapsulation.
+	 */
+	evl = mtod(m, struct ether_vlan_header *);
+	bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
+	    (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
+	evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
+	evl->evl_tag = htons(tag);
+	return (m);
+}
+
 DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(ether, 1);
Index: if_gre.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_gre.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/net/if_gre.c -L sys/net/if_gre.c -u -r1.1.1.2 -r1.2
--- sys/net/if_gre.c
+++ sys/net/if_gre.c
@@ -1,5 +1,5 @@
 /*	$NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */
-/*	 $FreeBSD: src/sys/net/if_gre.c,v 1.32.2.5 2006/02/16 01:08:40 qingli Exp $ */
+/*	 $FreeBSD: src/sys/net/if_gre.c,v 1.46 2007/06/26 23:01:01 rwatson Exp $ */
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -57,6 +57,7 @@
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
+#include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
@@ -83,7 +84,6 @@
 
 #include <net/bpf.h>
 
-#include <net/net_osdep.h>
 #include <net/if_gre.h>
 
 /*
@@ -104,7 +104,7 @@
 
 struct gre_softc_head gre_softc_list;
 
-static int	gre_clone_create(struct if_clone *, int);
+static int	gre_clone_create(struct if_clone *, int, caddr_t);
 static void	gre_clone_destroy(struct ifnet *);
 static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
 static int	gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
@@ -171,9 +171,10 @@
 }
 
 static int
-gre_clone_create(ifc, unit)
+gre_clone_create(ifc, unit, params)
 	struct if_clone *ifc;
 	int unit;
+	caddr_t params;
 {
 	struct gre_softc *sc;
 
@@ -210,20 +211,6 @@
 }
 
 static void
-gre_destroy(struct gre_softc *sc)
-{
-
-#ifdef INET
-	if (sc->encap != NULL)
-		encap_detach(sc->encap);
-#endif
-	bpfdetach(GRE2IFP(sc));
-	if_detach(GRE2IFP(sc));
-	if_free(GRE2IFP(sc));
-	free(sc, M_GRE);
-}
-
-static void
 gre_clone_destroy(ifp)
 	struct ifnet *ifp;
 {
@@ -232,7 +219,15 @@
 	mtx_lock(&gre_mtx);
 	LIST_REMOVE(sc, sc_list);
 	mtx_unlock(&gre_mtx);
-	gre_destroy(sc);
+
+#ifdef INET
+	if (sc->encap != NULL)
+		encap_detach(sc->encap);
+#endif
+	bpfdetach(ifp);
+	if_detach(ifp);
+	if_free(ifp);
+	free(sc, M_GRE);
 }
 
 /*
@@ -282,7 +277,7 @@
 		dst->sa_family = af;
 	}
 
-	if (ifp->if_bpf) {
+	if (bpf_peers_present(ifp->if_bpf)) {
 		af = dst->sa_family;
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
 	}
@@ -300,7 +295,7 @@
 			 * RFC2004 specifies that fragmented diagrams shouldn't
 			 * be encapsulated.
 			 */
-			if ((ip->ip_off & IP_MF) != 0) {
+			if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
 				_IF_DROP(&ifp->if_snd);
 				m_freem(m);
 				error = EINVAL;    /* is there better errno? */
@@ -329,7 +324,7 @@
 
 			if ((m->m_data - msiz) < m->m_pktdat) {
 				/* need new mbuf */
-				MGETHDR(m0, M_DONTWAIT, MT_HEADER);
+				MGETHDR(m0, M_DONTWAIT, MT_DATA);
 				if (m0 == NULL) {
 					_IF_DROP(&ifp->if_snd);
 					m_freem(m);
@@ -458,7 +453,11 @@
 	case SIOCSIFDSTADDR:
 		break;
 	case SIOCSIFFLAGS:
-		if ((error = suser(curthread)) != 0)
+		/*
+		 * XXXRW: Isn't this priv_check() redundant to the ifnet
+		 * layer check?
+		 */
+		if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0)
 			break;
 		if ((ifr->ifr_flags & IFF_LINK0) != 0)
 			sc->g_proto = IPPROTO_GRE;
@@ -470,7 +469,11 @@
 			sc->wccp_ver = WCCP_V1;
 		goto recompute;
 	case SIOCSIFMTU:
-		if ((error = suser(curthread)) != 0)
+		/*
+		 * XXXRW: Isn't this priv_check() redundant to the ifnet
+		 * layer check?
+		 */
+		if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0)
 			break;
 		if (ifr->ifr_mtu < 576) {
 			error = EINVAL;
@@ -482,8 +485,36 @@
 		ifr->ifr_mtu = GRE2IFP(sc)->if_mtu;
 		break;
 	case SIOCADDMULTI:
+		/*
+		 * XXXRW: Isn't this priv_checkr() redundant to the ifnet
+		 * layer check?
+		 */
+		if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0)
+			break;
+		if (ifr == 0) {
+			error = EAFNOSUPPORT;
+			break;
+		}
+		switch (ifr->ifr_addr.sa_family) {
+#ifdef INET
+		case AF_INET:
+			break;
+#endif
+#ifdef INET6
+		case AF_INET6:
+			break;
+#endif
+		default:
+			error = EAFNOSUPPORT;
+			break;
+		}
+		break;
 	case SIOCDELMULTI:
-		if ((error = suser(curthread)) != 0)
+		/*
+		 * XXXRW: Isn't this priv_check() redundant to the ifnet
+		 * layer check?
+		 */
+		if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0)
 			break;
 		if (ifr == 0) {
 			error = EAFNOSUPPORT;
@@ -504,7 +535,11 @@
 		}
 		break;
 	case GRESPROTO:
-		if ((error = suser(curthread)) != 0)
+		/*
+		 * XXXRW: Isn't this priv_check() redundant to the ifnet
+		 * layer check?
+		 */
+		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
 		sc->g_proto = ifr->ifr_flags;
 		switch (sc->g_proto) {
@@ -524,8 +559,9 @@
 		break;
 	case GRESADDRS:
 	case GRESADDRD:
-		if ((error = suser(curthread)) != 0)
-			break;
+		error = priv_check(curthread, PRIV_NET_GRE);
+		if (error)
+			return (error);
 		/*
 		 * set tunnel endpoints, compute a less specific route
 		 * to the remote end and mark if as up
@@ -590,7 +626,11 @@
 		ifr->ifr_addr = *sa;
 		break;
 	case SIOCSIFPHYADDR:
-		if ((error = suser(curthread)) != 0)
+		/*
+		 * XXXRW: Isn't this priv_check() redundant to the ifnet
+		 * layer check?
+		 */
+		if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
 			break;
 		if (aifr->ifra_addr.sin_family != AF_INET ||
 		    aifr->ifra_dstaddr.sin_family != AF_INET) {
@@ -606,7 +646,11 @@
 		sc->g_dst = aifr->ifra_dstaddr.sin_addr;
 		goto recompute;
 	case SIOCSLIFPHYADDR:
-		if ((error = suser(curthread)) != 0)
+		/*
+		 * XXXRW: Isn't this priv_check() redundant to the ifnet
+		 * layer check?
+		 */
+		if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
 			break;
 		if (lifr->addr.ss_family != AF_INET ||
 		    lifr->dstaddr.ss_family != AF_INET) {
@@ -623,7 +667,11 @@
 		    (satosin(&lifr->dstaddr))->sin_addr;
 		goto recompute;
 	case SIOCDIFPHYADDR:
-		if ((error = suser(curthread)) != 0)
+		/*
+		 * XXXRW: Isn't this priv_check() redundant to the ifnet
+		 * layer check?
+		 */
+		if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
 			break;
 		sc->g_src.s_addr = INADDR_ANY;
 		sc->g_dst.s_addr = INADDR_ANY;
@@ -788,7 +836,6 @@
 static int
 gremodevent(module_t mod, int type, void *data)
 {
-	struct gre_softc *sc;
 
 	switch (type) {
 	case MOD_LOAD:
@@ -796,15 +843,6 @@
 		break;
 	case MOD_UNLOAD:
 		if_clone_detach(&gre_cloner);
-
-		mtx_lock(&gre_mtx);
-		while ((sc = LIST_FIRST(&gre_softc_list)) != NULL) {
-			LIST_REMOVE(sc, sc_list);
-			mtx_unlock(&gre_mtx);
-			gre_destroy(sc);
-			mtx_lock(&gre_mtx);
-		}
-		mtx_unlock(&gre_mtx);
 		mtx_destroy(&gre_mtx);
 		break;
 	default:
Index: if_gif.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_gif.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/net/if_gif.c -L sys/net/if_gif.c -u -r1.1.1.2 -r1.2
--- sys/net/if_gif.c
+++ sys/net/if_gif.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/net/if_gif.c,v 1.52.2.4 2006/01/31 15:56:46 glebius Exp $	*/
+/*	$FreeBSD: src/sys/net/if_gif.c,v 1.66 2006/10/22 11:52:15 rwatson Exp $	*/
 /*	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $	*/
 
 /*-
@@ -37,7 +37,6 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
@@ -84,7 +83,7 @@
 #include <net/if_bridgevar.h>
 #include <net/if_gif.h>
 
-#include <net/net_osdep.h>
+#include <security/mac/mac_framework.h>
 
 #define GIFNAME		"gif"
 
@@ -101,7 +100,7 @@
 void	(*ng_gif_detach_p)(struct ifnet *ifp);
 
 static void	gif_start(struct ifnet *);
-static int	gif_clone_create(struct if_clone *, int);
+static int	gif_clone_create(struct if_clone *, int, caddr_t);
 static void	gif_clone_destroy(struct ifnet *);
 
 IFC_SIMPLE_DECLARE(gif, 0);
@@ -140,9 +139,10 @@
     &parallel_tunnels, 0, "Allow parallel tunnels?");
 
 static int
-gif_clone_create(ifc, unit)
+gif_clone_create(ifc, unit, params)
 	struct if_clone *ifc;
 	int unit;
+	caddr_t params;
 {
 	struct gif_softc *sc;
 
@@ -184,10 +184,15 @@
 }
 
 static void
-gif_destroy(struct gif_softc *sc)
+gif_clone_destroy(ifp)
+	struct ifnet *ifp;
 {
-	struct ifnet *ifp = GIF2IFP(sc);
 	int err;
+	struct gif_softc *sc = ifp->if_softc;
+
+	mtx_lock(&gif_mtx);
+	LIST_REMOVE(sc, gif_list);
+	mtx_unlock(&gif_mtx);
 
 	gif_delete_tunnel(ifp);
 #ifdef INET6
@@ -214,25 +219,12 @@
 	free(sc, M_GIF);
 }
 
-static void
-gif_clone_destroy(ifp)
-	struct ifnet *ifp;
-{
-	struct gif_softc *sc = ifp->if_softc;
-
-	mtx_lock(&gif_mtx);
-	LIST_REMOVE(sc, gif_list);
-	mtx_unlock(&gif_mtx);
-	gif_destroy(sc);
-}
-
 static int
 gifmodevent(mod, type, data)
 	module_t mod;
 	int type;
 	void *data;
 {
-	struct gif_softc *sc;
 
 	switch (type) {
 	case MOD_LOAD:
@@ -247,15 +239,6 @@
 		break;
 	case MOD_UNLOAD:
 		if_clone_detach(&gif_cloner);
-
-		mtx_lock(&gif_mtx);
-		while ((sc = LIST_FIRST(&gif_softc_list)) != NULL) {
-			LIST_REMOVE(sc, gif_list);
-			mtx_unlock(&gif_mtx);
-			gif_destroy(sc);
-			mtx_lock(&gif_mtx);
-		}
-		mtx_unlock(&gif_mtx);
 		mtx_destroy(&gif_mtx);
 #ifdef INET6
 		ip6_gif_hlim = 0;
@@ -429,6 +412,7 @@
 
 	if (!(ifp->if_flags & IFF_UP) ||
 	    sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
+		GIF_UNLOCK(sc);
 		m_freem(m);
 		error = ENETDOWN;
 		goto end;
@@ -441,9 +425,7 @@
 	}
 
 	af = dst->sa_family;
-	if (ifp->if_bpf) {
-		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
-	}
+	BPF_MTAP2(ifp, &af, sizeof(af), m);
 	ifp->if_opackets++;	
 	ifp->if_obytes += m->m_pkthdr.len;
 
@@ -470,13 +452,12 @@
 	default:
 		m_freem(m);		
 		error = ENETDOWN;
-		goto end;
 	}
 
+	GIF_UNLOCK(sc);
   end:
 	if (error)
 		ifp->if_oerrors++;
-	GIF_UNLOCK(sc);
 	return (error);
 }
 
@@ -501,7 +482,7 @@
 	mac_create_mbuf_from_ifnet(ifp, m);
 #endif
 
-	if (ifp->if_bpf) {
+	if (bpf_peers_present(ifp->if_bpf)) {
 		u_int32_t af1 = af;
 		bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
 	}
@@ -935,13 +916,6 @@
 	if (odst)
 		free((caddr_t)odst, M_IFADDR);
 
-	if (sc->gif_psrc && sc->gif_pdst)
-		ifp->if_drv_flags |= IFF_DRV_RUNNING;
-	else
-		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-
-	return 0;
-
  bad:
 	if (sc->gif_psrc && sc->gif_pdst)
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
@@ -972,9 +946,5 @@
 #ifdef INET6
 	(void)in6_gif_detach(sc);
 #endif
-
-	if (sc->gif_psrc && sc->gif_pdst)
-		ifp->if_drv_flags |= IFF_DRV_RUNNING;
-	else
-		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 }
Index: bsd_comp.c
===================================================================
RCS file: /home/cvs/src/sys/net/bsd_comp.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/bsd_comp.c -L sys/net/bsd_comp.c -u -r1.1.1.1 -r1.2
--- sys/net/bsd_comp.c
+++ sys/net/bsd_comp.c
@@ -37,7 +37,7 @@
 /*
  * This version is for use with mbufs on BSD-derived systems.
  *
- * $FreeBSD: src/sys/net/bsd_comp.c,v 1.23 2005/07/05 23:39:49 rwatson Exp $
+ * $FreeBSD: src/sys/net/bsd_comp.c,v 1.24 2006/10/22 11:52:15 rwatson Exp $
  */
 
 #include "opt_mac.h"
@@ -45,7 +45,6 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
@@ -55,6 +54,8 @@
 #define PACKETPTR	struct mbuf *
 #include <net/ppp_comp.h>
 
+#include <security/mac/mac_framework.h>
+
 /*
  * PPP "BSD compress" compression
  *  The differences between this compression and the classic BSD LZW
Index: if_mib.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_mib.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/net/if_mib.c -L sys/net/if_mib.c -u -r1.1.1.2 -r1.2
--- sys/net/if_mib.c
+++ sys/net/if_mib.c
@@ -26,7 +26,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/net/if_mib.c,v 1.15.2.2 2006/01/26 07:46:28 harti Exp $
+ * $FreeBSD: src/sys/net/if_mib.c,v 1.18 2006/01/04 12:57:09 harti Exp $
  */
 
 #include <sys/param.h>
@@ -82,10 +82,10 @@
 		return EINVAL;
 
 	if (name[0] <= 0 || name[0] > if_index ||
-	    ifaddr_byindex(name[0]) == NULL)
+	    ifnet_byindex(name[0]) == NULL)
 		return ENOENT;
 
-	ifp = ifaddr_byindex(name[0])->ifa_ifp;
+	ifp = ifnet_byindex(name[0]);
 
 	switch(name[1]) {
 	default:
@@ -152,7 +152,6 @@
 			error = EPERM;
 		free(dbuf, M_TEMP);
 		return (error);
-		
 	}
 	return 0;
 }
--- /dev/null
+++ sys/net/if_edsc.c
@@ -0,0 +1,354 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following edsclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following edsclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE EDSCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	From: @(#)if_loop.c	8.1 (Berkeley) 6/10/93
+ * $FreeBSD: src/sys/net/if_edsc.c,v 1.3 2007/03/26 09:05:10 yar Exp $
+ */
+
+/*
+ * Discard interface driver for protocol testing and timing.
+ * Mimics an Ethernet device so that VLANs can be attached to it etc.
+ */
+
+#include <sys/param.h>		/* types, important constants */
+#include <sys/kernel.h>		/* SYSINIT for load-time initializations */
+#include <sys/malloc.h>		/* malloc(9) */
+#include <sys/module.h>		/* module(9) */
+#include <sys/mbuf.h>		/* mbuf(9) */
+#include <sys/socket.h>		/* struct ifreq */
+#include <sys/sockio.h>		/* socket ioctl's */
+/* #include <sys/systm.h> if you need printf(9) or other all-purpose globals */
+
+#include <net/bpf.h>		/* bpf(9) */
+#include <net/ethernet.h>	/* Ethernet related constants and types */
+#include <net/if.h>		/* basic part of ifnet(9) */
+#include <net/if_clone.h>	/* network interface cloning */
+#include <net/if_types.h>	/* IFT_ETHER and friends */
+#include <net/if_var.h>		/* kernel-only part of ifnet(9) */
+
+/*
+ * Software configuration of an interface specific to this device type.
+ */
+struct edsc_softc {
+	struct ifnet	*sc_ifp; /* ptr to generic interface configuration */
+
+	/*
+	 * A non-null driver can keep various things here, for instance,
+	 * the hardware revision, cached values of write-only registers, etc.
+	 */
+};
+
+/*
+ * Simple cloning methods.
+ * IFC_SIMPLE_DECLARE() expects precisely these names.
+ */
+static int	edsc_clone_create(struct if_clone *, int, caddr_t);
+static void	edsc_clone_destroy(struct ifnet *);
+
+/*
+ * Interface driver methods.
+ */
+static void	edsc_init(void *dummy);
+/* static void edsc_input(struct ifnet *ifp, struct mbuf *m); would be here */
+static int	edsc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
+static void	edsc_start(struct ifnet *ifp);
+
+/*
+ * We'll allocate softc instances from this.
+ */
+static		MALLOC_DEFINE(M_EDSC, "edsc", "Ethernet discard interface");
+
+/*
+ * Attach to the interface cloning framework under the name of "edsc".
+ * The second argument is the number of units to be created from
+ * the outset.  It's also the minimum number of units allowed.
+ * We don't want any units created as soon as the driver is loaded.
+ */
+IFC_SIMPLE_DECLARE(edsc, 0);
+
+/*
+ * Create an interface instance.
+ */
+static int
+edsc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+	struct edsc_softc	*sc;
+	struct ifnet		*ifp;
+	static u_char		 eaddr[ETHER_ADDR_LEN];	/* 0:0:0:0:0:0 */
+
+	/*
+	 * Allocate soft and ifnet structures.  Link each to the other.
+	 */
+	sc = malloc(sizeof(struct edsc_softc), M_EDSC, M_WAITOK | M_ZERO);
+	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
+	if (ifp == NULL) {
+		free(sc, M_EDSC);
+		return (ENOSPC);
+	}
+
+	ifp->if_softc = sc;
+
+	/*
+	 * Get a name for this particular interface in its ifnet structure.
+	 */
+	if_initname(ifp, ifc->ifc_name, unit);
+
+	/*
+	 * Typical Ethernet interface flags: we can do broadcast and
+	 * multicast but can't hear our own broadcasts or multicasts.
+	 */
+	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
+
+	/*
+	 * We can pretent we have the whole set of hardware features
+	 * because we just discard all packets we get from the upper layer.
+	 * However, the features are disabled initially.  They can be
+	 * enabled via edsc_ioctl() when needed.
+	 */
+	ifp->if_capabilities =
+	    IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM |
+	    IFCAP_HWCSUM | IFCAP_TSO |
+	    IFCAP_JUMBO_MTU;
+	ifp->if_capenable = 0;
+
+	/*
+	 * Set the interface driver methods.
+	 */
+	ifp->if_init = edsc_init;
+	/* ifp->if_input = edsc_input; */
+	ifp->if_ioctl = edsc_ioctl;
+	ifp->if_start = edsc_start;
+
+	/*
+	 * Set the maximum output queue length from the global parameter.
+	 */
+	ifp->if_snd.ifq_maxlen = ifqmaxlen;
+
+	/*
+	 * Do ifnet initializations common to all Ethernet drivers
+	 * and attach to the network interface framework.
+	 * TODO: Pick a non-zero link level address.
+	 */
+	ether_ifattach(ifp, eaddr);
+
+	/*
+	 * Now we can mark the interface as running, i.e., ready
+	 * for operation.
+	 */
+	ifp->if_drv_flags |= IFF_DRV_RUNNING;
+
+	return (0);
+}
+
+/*
+ * Destroy an interface instance.
+ */
+static void
+edsc_clone_destroy(struct ifnet *ifp)
+{
+	struct edsc_softc	*sc = ifp->if_softc;
+
+	/*
+	 * Detach from the network interface framework.
+	 */
+	ether_ifdetach(ifp);
+
+	/*
+	 * Free memory occupied by ifnet and softc.
+	 */
+	if_free(ifp);
+	free(sc, M_EDSC);
+}
+
+/*
+ * This method is invoked from ether_ioctl() when it's time
+ * to bring up the hardware.
+ */
+static void
+edsc_init(void *dummy)
+{
+#if 0	/* what a hardware driver would do here... */
+	struct edsc_soft	*sc = (struct edsc_softc *)dummy;
+	struct ifnet		*ifp = sc->sc_ifp;
+
+	/* blah-blah-blah */
+#endif
+}
+
+/*
+ * Network interfaces are controlled via the ioctl(2) syscall.
+ */
+static int
+edsc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	struct ifreq		*ifr = (struct ifreq *)data;
+
+	switch (cmd) {
+	case SIOCSIFCAP:
+#if 1
+		/*
+		 * Just turn on any capabilities requested.
+		 * The generic ifioctl() function has already made sure
+		 * that they are supported, i.e., set in if_capabilities.
+		 */
+		ifp->if_capenable = ifr->ifr_reqcap;
+#else
+		/*
+		 * A h/w driver would need to analyze the requested
+		 * bits and program the hardware, e.g.:
+		 */
+		mask = ifp->if_capenable ^ ifr->ifr_reqcap;
+
+		if (mask & IFCAP_VLAN_HWTAGGING) {
+			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
+
+			if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
+				/* blah-blah-blah */
+			else
+				/* etc-etc-etc */
+		}
+#endif
+		break;
+
+	default:
+		/*
+		 * Offload the rest onto the common Ethernet handler.
+		 */
+		return (ether_ioctl(ifp, cmd, data));
+	}
+
+	return (0);
+}
+
+/*
+ * Process the output queue.
+ */
+static void
+edsc_start(struct ifnet *ifp)
+{
+	struct mbuf		*m;
+
+	/*
+	 * A hardware interface driver can set IFF_DRV_OACTIVE
+	 * in ifp->if_drv_flags:
+	 *
+	 * ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+	 *
+	 * to prevent if_start from being invoked again while the
+	 * transmission is under way.  The flag is to protect the
+	 * device's transmitter, not the method itself.  The output
+	 * queue is locked and several threads can process it in
+	 * parallel safely, so the driver can use other means to
+	 * serialize access to the transmitter.
+	 *
+	 * If using IFF_DRV_OACTIVE, the driver should clear the flag
+	 * not earlier than the current transmission is complete, e.g.,
+	 * upon an interrupt from the device, not just before returning
+	 * from if_start.  This method merely starts the transmission,
+	 * which may proceed asynchronously.
+	 */
+
+	/*
+	 * We loop getting packets from the queue until it's empty.
+	 * A h/w driver would loop until the device can accept more
+	 * data into its buffer, or while there are free transmit
+	 * descriptors, or whatever.
+	 */
+	for (;;) {
+		/*
+		 * Try to dequeue one packet.  Stop if the queue is empty.
+		 * Use IF_DEQUEUE() here if ALTQ(9) support is unneeded.
+		 */
+		IFQ_DEQUEUE(&ifp->if_snd, m);
+		if (m == NULL)
+			break;
+
+		/*
+		 * Let bpf(9) at the packet.
+		 */
+		BPF_MTAP(ifp, m);
+
+		/*
+		 * Update the interface counters.
+		 */
+		ifp->if_obytes += m->m_pkthdr.len;
+		ifp->if_opackets++;
+
+		/*
+		 * Finally, just drop the packet.
+		 * TODO: Reply to ARP requests unless IFF_NOARP is set.
+		 */
+		m_freem(m);
+	}
+
+	/*
+	 * ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+	 * would be here only if the transmission were synchronous.
+	 */
+}
+
+/*
+ * This function provides handlers for module events, namely load and unload.
+ */
+static int
+edsc_modevent(module_t mod, int type, void *data)
+{
+
+	switch (type) {
+	case MOD_LOAD:
+		/*
+		 * Connect to the network interface cloning framework.
+		 */
+		if_clone_attach(&edsc_cloner);
+		break;
+
+	case MOD_UNLOAD:
+		/*
+		 * Disconnect from the cloning framework.
+		 * Existing interfaces will be disposed of properly.
+		 */
+		if_clone_detach(&edsc_cloner);
+		break;
+
+	default:
+		/*
+		 * There are other event types, but we don't handle them.
+		 * See module(9).
+		 */
+		return (EOPNOTSUPP);
+	}
+	return (0);
+}
+
+static moduledata_t edsc_mod = {
+	"if_edsc",			/* name */
+	edsc_modevent,			/* event handler */
+	NULL				/* additional data */
+};
+
+DECLARE_MODULE(if_edsc, edsc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
--- sys/net/bridge.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*-
- * Copyright (c) 1998-2002 Luigi Rizzo
- *
- * Work partly supported by: Cisco Systems, Inc. - NSITE lab, RTP, NC
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/net/bridge.h,v 1.15 2005/06/10 16:49:18 brooks Exp $
- */
-
-extern int do_bridge;
-
-/*
- * We need additional per-interface info for the bridge, which is
- * stored in a struct bdg_softc. The ifp2sc[] array provides a pointer
- * to this struct using the if_index as a mapping key.
- * bdg_softc has a backpointer to the struct ifnet, the bridge
- * flags, and a cluster (bridging occurs only between port of the
- * same cluster).
- */
-
-struct cluster_softc;	/* opaque here, defined in bridge.c */
-
-struct bdg_softc {
-    struct ifnet *ifp ;
-    int flags ;
-#define IFF_BDG_PROMISC 0x0001  /* set promisc mode on this if.	*/
-#define IFF_MUTE        0x0002  /* mute this if for bridging.   */
-#define IFF_USED        0x0004  /* use this if for bridging.    */
-    struct cluster_softc *cluster;
-} ;
-
-extern struct bdg_softc *ifp2sc;
-
-#define BDG_USED(ifp) (ifp2sc[ifp->if_index].flags & IFF_USED)
-/*
- * BDG_ACTIVE(ifp) does all checks to see if bridging is enabled, loaded,
- * and used on a given interface.
- */
-#define	BDG_ACTIVE(ifp)	(do_bridge && BDG_LOADED && BDG_USED(ifp))
-
-/*
- * The following constants are not legal ifnet pointers, and are used
- * as return values from the classifier, bridge_dst_lookup().
- * The same values are used as index in the statistics arrays,
- * with BDG_FORWARD replacing specifically forwarded packets.
- *
- * These constants are here because they are used in 'netstat'
- * to show bridge statistics.
- */
-#define BDG_BCAST	( (struct ifnet *)1 )
-#define BDG_MCAST	( (struct ifnet *)2 )
-#define BDG_LOCAL	( (struct ifnet *)3 )
-#define BDG_DROP	( (struct ifnet *)4 )
-#define BDG_UNKNOWN	( (struct ifnet *)5 )
-#define BDG_IN		( (struct ifnet *)7 )
-#define BDG_OUT		( (struct ifnet *)8 )
-#define BDG_FORWARD	( (struct ifnet *)9 )
-
-/*
- * Statistics are passed up with the sysctl interface, "netstat -p bdg"
- * reads them. PF_BDG defines the 'bridge' protocol family.
- */
-
-#define PF_BDG 3 /* XXX superhack */
-
-#define STAT_MAX (int)BDG_FORWARD
-struct bdg_port_stat {
-    char name[16];
-    u_long collisions;
-    u_long p_in[STAT_MAX+1];
-} ;
-
-/* XXX this should be made dynamic */
-#define BDG_MAX_PORTS 128
-struct bdg_stats {
-    struct bdg_port_stat s[BDG_MAX_PORTS];
-} ;
-
-
-#define BDG_STAT(ifp, type) bdg_stats.s[ifp->if_index].p_in[(uintptr_t)type]++ 
- 
-#ifdef _KERNEL
-typedef	struct mbuf *bridge_in_t(struct ifnet *, struct mbuf *);
-/* bdg_forward frees the mbuf if necessary, returning null */
-typedef	struct mbuf *bdg_forward_t(struct mbuf *, struct ifnet *);
-typedef	void bdgtakeifaces_t(void);
-extern	bridge_in_t *bridge_in_ptr;
-extern	bdg_forward_t *bdg_forward_ptr;
-extern	bdgtakeifaces_t *bdgtakeifaces_ptr;
-
-#define	BDG_LOADED	(bdgtakeifaces_ptr != NULL)
-#endif /* KERNEL */
Index: route.c
===================================================================
RCS file: /home/cvs/src/sys/net/route.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/route.c -L sys/net/route.c -u -r1.1.1.1 -r1.2
--- sys/net/route.c
+++ sys/net/route.c
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)route.c	8.3.1.1 (Berkeley) 2/23/95
- * $FreeBSD: src/sys/net/route.c,v 1.109.2.2 2005/09/26 14:59:12 glebius Exp $
+ * $FreeBSD: src/sys/net/route.c,v 1.120.2.1.2.1 2008/01/09 15:23:36 mux Exp $
  */
 
 #include "opt_inet.h"
@@ -137,7 +137,6 @@
 	int err = 0, msgtype = RTM_MISS;
 
 	newrt = NULL;
-	bzero(&info, sizeof(info));
 	/*
 	 * Look up the address in the table for that Address Family
 	 */
@@ -150,7 +149,7 @@
 	    (rn->rn_flags & RNF_ROOT) == 0) {
 		/*
 		 * If we find it and it's not the root node, then
-		 * get a refernce on the rtentry associated.
+		 * get a reference on the rtentry associated.
 		 */
 		newrt = rt = RNTORT(rn);
 		nflags = rt->rt_flags & ~ignflags;
@@ -183,12 +182,13 @@
 				goto miss;
 			}
 			/* Inform listeners of the new route. */
+			bzero(&info, sizeof(info));
 			info.rti_info[RTAX_DST] = rt_key(newrt);
 			info.rti_info[RTAX_NETMASK] = rt_mask(newrt);
 			info.rti_info[RTAX_GATEWAY] = newrt->rt_gateway;
 			if (newrt->rt_ifp != NULL) {
 				info.rti_info[RTAX_IFP] =
-				    ifaddr_byindex(newrt->rt_ifp->if_index)->ifa_addr;
+				    newrt->rt_ifp->if_addr->ifa_addr;
 				info.rti_info[RTAX_IFA] = newrt->rt_ifa->ifa_addr;
 			}
 			rt_missmsg(RTM_ADD, &info, newrt->rt_flags, 0);
@@ -213,6 +213,7 @@
 			 * Authorities.
 			 * For a delete, this is not an error. (report == 0)
 			 */
+			bzero(&info, sizeof(info));
 			info.rti_info[RTAX_DST] = dst;
 			rt_missmsg(msgtype, &info, 0, err);
 		}
@@ -231,22 +232,21 @@
 {
 	struct radix_node_head *rnh;
 
-	/* XXX the NULL checks are probably useless */
-	if (rt == NULL)
-		panic("rtfree: NULL rt");
+	KASSERT(rt != NULL,("%s: NULL rt", __func__));
 	rnh = rt_tables[rt_key(rt)->sa_family];
-	if (rnh == NULL)
-		panic("rtfree: NULL rnh");
+	KASSERT(rnh != NULL,("%s: NULL rnh", __func__));
 
 	RT_LOCK_ASSERT(rt);
 
 	/*
-	 * decrement the reference count by one and if it reaches 0,
-	 * and there is a close function defined, call the close function
+	 * The callers should use RTFREE_LOCKED() or RTFREE(), so
+	 * we should come here exactly with the last reference.
 	 */
 	RT_REMREF(rt);
-	if (rt->rt_refcnt > 0)
+	if (rt->rt_refcnt > 0) {
+		printf("%s: %p has %lu refs\n", __func__, rt, rt->rt_refcnt);
 		goto done;
+	}
 
 	/*
 	 * On last reference give the "close method" a chance
@@ -267,7 +267,7 @@
 	 */
 	if ((rt->rt_flags & RTF_UP) == 0) {
 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
-			panic ("rtfree 2");
+			panic("rtfree 2");
 		/*
 		 * the rtentry must have been removed from the routing table
 		 * so it is represented in rttrash.. remove that now.
@@ -435,6 +435,7 @@
 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
 {
 	register struct ifaddr *ifa;
+	int not_found = 0;
 
 	if ((flags & RTF_GATEWAY) == 0) {
 		/*
@@ -463,8 +464,26 @@
 		struct rtentry *rt = rtalloc1(gateway, 0, 0UL);
 		if (rt == NULL)
 			return (NULL);
+		/*
+		 * dismiss a gateway that is reachable only
+		 * through the default router
+		 */
+		switch (gateway->sa_family) {
+		case AF_INET:
+			if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY)
+				not_found = 1;
+			break;
+		case AF_INET6:
+			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr))
+				not_found = 1;
+			break;
+		default:
+			break;
+		}
 		RT_REMREF(rt);
 		RT_UNLOCK(rt);
+		if (not_found)
+			return (NULL);
 		if ((ifa = rt->rt_ifa) == NULL)
 			return (NULL);
 	}
@@ -499,6 +518,9 @@
 {
 	struct rt_addrinfo info;
 
+	if (dst->sa_len == 0)
+		return(EINVAL);
+
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_flags = flags;
 	info.rti_info[RTAX_DST] = dst;
@@ -858,7 +880,7 @@
 		}
 
 		/*
-		 * if this protocol has something to add to this then
+		 * If this protocol has something to add to this then
 		 * allow it to do that as well.
 		 */
 		if (ifa->ifa_rtrequest)
@@ -995,6 +1017,7 @@
 	struct radix_node_head *rnh = rt_tables[dst->sa_family];
 	int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
 
+again:
 	RT_LOCK_ASSERT(rt);
 
 	/*
@@ -1024,11 +1047,18 @@
 		RT_UNLOCK(rt);		/* XXX workaround LOR */
 		gwrt = rtalloc1(gate, 1, 0);
 		if (gwrt == rt) {
-			RT_LOCK_ASSERT(rt);
 			RT_REMREF(rt);
 			return (EADDRINUSE); /* failure */
 		}
-		RT_LOCK(rt);
+		/*
+		 * Try to reacquire the lock on rt, and if it fails,
+		 * clean state and restart from scratch.
+		 */
+		if (!RT_TRYLOCK(rt)) {
+			RTFREE_LOCKED(gwrt);
+			RT_LOCK(rt);
+			goto again;
+		}
 		/*
 		 * If there is already a gwroute, then drop it. If we
 		 * are asked to replace route with itself, then do
@@ -1137,6 +1167,9 @@
 		dst = ifa->ifa_addr;
 		netmask = ifa->ifa_netmask;
 	}
+	if (dst->sa_len == 0)
+		return(EINVAL);
+
 	/*
 	 * If it's a delete, check that if it exists, it's on the correct
 	 * interface or we might scrub a route to another ifa which would
@@ -1252,7 +1285,6 @@
 int
 rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst)
 {
-#define senderr(x) { error = x ; goto bad; }
 	struct rtentry *rt;
 	struct rtentry *rt0;
 	int error;
@@ -1269,7 +1301,7 @@
 			RT_REMREF(rt);
 			/* XXX what about if change? */
 		} else
-			senderr(EHOSTUNREACH);
+			return (EHOSTUNREACH);
 		rt0 = rt;
 	}
 	/* XXX BSD/OS checks dst->sa_family != AF_NS */
@@ -1279,16 +1311,24 @@
 		rt = rt->rt_gwroute;
 		RT_LOCK(rt);		/* NB: gwroute */
 		if ((rt->rt_flags & RTF_UP) == 0) {
-			rtfree(rt);	/* unlock gwroute */
+			RTFREE_LOCKED(rt);	/* unlock gwroute */
 			rt = rt0;
+			rt0->rt_gwroute = NULL;
 		lookup:
 			RT_UNLOCK(rt0);
 			rt = rtalloc1(rt->rt_gateway, 1, 0UL);
+			if (rt == rt0) {
+				RT_REMREF(rt0);
+				RT_UNLOCK(rt0);
+				return (ENETUNREACH);
+			}
 			RT_LOCK(rt0);
+			if (rt0->rt_gwroute != NULL)
+				RTFREE(rt0->rt_gwroute);
 			rt0->rt_gwroute = rt;
 			if (rt == NULL) {
 				RT_UNLOCK(rt0);
-				senderr(EHOSTUNREACH);
+				return (EHOSTUNREACH);
 			}
 		}
 		RT_UNLOCK(rt0);
@@ -1296,19 +1336,15 @@
 	/* XXX why are we inspecting rmx_expire? */
 	error = (rt->rt_flags & RTF_REJECT) &&
 		(rt->rt_rmx.rmx_expire == 0 ||
-			time_second < rt->rt_rmx.rmx_expire);
+			time_uptime < rt->rt_rmx.rmx_expire);
 	if (error) {
 		RT_UNLOCK(rt);
-		senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
+		return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
 	}
 
 	*lrt = rt;
 	*lrt0 = rt0;
 	return (0);
-bad:
-	/* NB: lrt and lrt0 should not be interpreted if error is non-zero */
-	return (error);
-#undef senderr
 }
 
 /* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */
Index: if_arc.h
===================================================================
RCS file: /home/cvs/src/sys/net/if_arc.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_arc.h -L sys/net/if_arc.h -u -r1.1.1.1 -r1.2
--- sys/net/if_arc.h
+++ sys/net/if_arc.h
@@ -1,5 +1,5 @@
 /*	$NetBSD: if_arc.h,v 1.13 1999/11/19 20:41:19 thorpej Exp $	*/
-/* $FreeBSD: src/sys/net/if_arc.h,v 1.8 2005/06/10 16:49:18 brooks Exp $ */
+/* $FreeBSD: src/sys/net/if_arc.h,v 1.9 2006/04/12 07:44:31 rwatson Exp $ */
 
 /*-
  * Copyright (c) 1982, 1986, 1993
@@ -130,7 +130,7 @@
 void	arc_ifattach(struct ifnet *, u_int8_t);
 void	arc_ifdetach(struct ifnet *);
 void	arc_storelladdr(struct ifnet *, u_int8_t);
-int	arc_isphds(int);
+int	arc_isphds(u_int8_t);
 void	arc_input(struct ifnet *, struct mbuf *);
 int	arc_output(struct ifnet *, struct mbuf *,
 	    struct sockaddr *, struct rtentry *);
Index: netisr.c
===================================================================
RCS file: /home/cvs/src/sys/net/netisr.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/netisr.c -L sys/net/netisr.c -u -r1.1.1.1 -r1.2
--- sys/net/netisr.c
+++ sys/net/netisr.c
@@ -24,11 +24,10 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/net/netisr.c,v 1.15.2.2 2005/10/07 14:00:05 glebius Exp $
+ * $FreeBSD: src/sys/net/netisr.c,v 1.20 2007/08/06 14:26:00 rwatson Exp $
  */
 
 #include "opt_device_polling.h"
-#include "opt_net.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -56,24 +55,6 @@
 #include <net/if_var.h>
 #include <net/netisr.h>
 
-/* 
- * debug_mpsafenet controls network subsystem-wide use of the Giant lock,
- * from system calls down to interrupt handlers.  It can be changed only via
- * a tunable at boot, not at run-time, due to the complexity of unwinding.
- * The compiled default is set via a kernel option; right now, the default
- * unless otherwise specified is to run the network stack without Giant.
- */
-#ifdef NET_WITH_GIANT
-int	debug_mpsafenet = 0;
-#else
-int	debug_mpsafenet = 1;
-#endif
-int	debug_mpsafenet_toolatetotwiddle = 0;
-
-TUNABLE_INT("debug.mpsafenet", &debug_mpsafenet);
-SYSCTL_INT(_debug, OID_AUTO, mpsafenet, CTLFLAG_RD, &debug_mpsafenet, 0,
-    "Enable/disable MPSAFE network support");
-
 volatile unsigned int	netisr;	/* scheduling bits for network */
 
 struct netisr {
@@ -84,78 +65,6 @@
 
 static void *net_ih;
 
-/*
- * Not all network code is currently capable of running MPSAFE; however,
- * most of it is.  Since those sections that are not are generally optional
- * components not shipped with default kernels, we provide a basic way to
- * determine whether MPSAFE operation is permitted: based on a default of
- * yes, we permit non-MPSAFE components to use a registration call to
- * identify that they require Giant.  If the system is early in the boot
- * process still, then we change the debug_mpsafenet setting to choose a
- * non-MPSAFE execution mode (degraded).  If it's too late for that (since
- * the setting cannot be changed at run time), we generate a console warning
- * that the configuration may be unsafe.
- */
-static int mpsafe_warn_count;
-
-/*
- * Function call implementing registration of a non-MPSAFE network component.
- */
-void
-net_warn_not_mpsafe(const char *component)
-{
-
-	/*
-	 * If we're running with Giant over the network stack, there is no
-	 * problem.
-	 */
-	if (!debug_mpsafenet)
-		return;
-
-	/*
-	 * If it's not too late to change the MPSAFE setting for the network
-	 * stack, do so now.  This effectively suppresses warnings by
-	 * components registering later.
-	 */
-	if (!debug_mpsafenet_toolatetotwiddle) {
-		debug_mpsafenet = 0;
-		printf("WARNING: debug.mpsafenet forced to 0 as %s requires "
-		    "Giant\n", component);
-		return;
-	}
-
-	/*
-	 * We must run without Giant, so generate a console warning with some
-	 * information with what to do about it.  The system may be operating
-	 * unsafely, however.
-	 */
-	printf("WARNING: Network stack Giant-free, but %s requires Giant.\n",
-	    component);
-	if (mpsafe_warn_count == 0)
-		printf("    Consider adding 'options NET_WITH_GIANT' or "
-		    "setting debug.mpsafenet=0\n");
-	mpsafe_warn_count++;
-}
-
-/*
- * This sysinit is run after any pre-loaded or compiled-in components have
- * announced that they require Giant, but before any modules loaded at
- * run-time.
- */
-static void
-net_mpsafe_toolate(void *arg)
-{
-
-	debug_mpsafenet_toolatetotwiddle = 1;
-
-	if (!debug_mpsafenet)
-		printf("WARNING: MPSAFE network stack disabled, expect "
-		    "reduced performance.\n");
-}
-
-SYSINIT(net_mpsafe_toolate, SI_SUB_SETTINGS, SI_ORDER_ANY, net_mpsafe_toolate,
-    NULL);
-
 void
 legacy_setsoftnet(void)
 {
@@ -170,8 +79,6 @@
 	    ("bad isr %d", num));
 	netisrs[num].ni_handler = handler;
 	netisrs[num].ni_queue = inq;
-	if ((flags & NETISR_MPSAFE) && !debug_mpsafenet)
-		flags &= ~NETISR_MPSAFE;
 	netisrs[num].ni_flags = flags;
 }
 
@@ -201,7 +108,7 @@
 
 SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr counters");
 
-static int	netisr_direct = 0;
+static int	netisr_direct = 1;
 SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RW, 
     &netisr_direct, 0, "enable direct dispatch");
 TUNABLE_INT("net.isr.direct", &netisr_direct);
Index: if_bridgevar.h
===================================================================
RCS file: /home/cvs/src/sys/net/if_bridgevar.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/net/if_bridgevar.h -L sys/net/if_bridgevar.h -u -r1.2 -r1.3
--- sys/net/if_bridgevar.h
+++ sys/net/if_bridgevar.h
@@ -67,7 +67,7 @@
  *
  * OpenBSD: if_bridge.h,v 1.14 2001/03/22 03:48:29 jason Exp
  *
- * $FreeBSD: src/sys/net/if_bridgevar.h,v 1.4.2.5 2006/03/09 08:21:19 thompsa Exp $
+ * $FreeBSD: src/sys/net/if_bridgevar.h,v 1.23.2.1 2007/11/07 19:21:41 thompsa Exp $
  */
 
 /*
@@ -108,6 +108,12 @@
 #define	BRDGSIFCOST		22	/* set if path cost (ifbreq) */
 #define	BRDGADDS		23	/* add bridge span member (ifbreq) */
 #define	BRDGDELS		24	/* delete bridge span member (ifbreq) */
+#define	BRDGPARAM		25	/* get bridge STP params (ifbropreq) */
+#define	BRDGGRTE		26	/* get cache drops (ifbrparam) */
+#define	BRDGGIFSSTP		27	/* get member STP params list
+					 * (ifbpstpconf) */
+#define	BRDGSPROTO		28	/* set protocol (ifbrparam) */
+#define	BRDGSTXHC		29	/* set tx hold count (ifbrparam) */
 
 /*
  * Generic bridge control request.
@@ -115,31 +121,41 @@
 struct ifbreq {
 	char		ifbr_ifsname[IFNAMSIZ];	/* member if name */
 	uint32_t	ifbr_ifsflags;		/* member if flags */
-	uint8_t		ifbr_state;		/* member if STP state */
-	uint8_t		ifbr_priority;		/* member if STP priority */
-	uint8_t		ifbr_path_cost;		/* member if STP cost */
+	uint32_t	ifbr_stpflags;		/* member if STP flags */
+	uint32_t	ifbr_path_cost;		/* member if STP cost */
 	uint8_t		ifbr_portno;		/* member if port number */
+	uint8_t		ifbr_priority;		/* member if STP priority */
+	uint8_t		ifbr_proto;		/* member if STP protocol */
+	uint8_t		ifbr_role;		/* member if STP role */
+	uint8_t		ifbr_state;		/* member if STP state */
+	uint8_t		pad[44];
 };
 
 /* BRDGGIFFLAGS, BRDGSIFFLAGS */
-#define	IFBIF_LEARNING		0x01	/* if can learn */
-#define	IFBIF_DISCOVER		0x02	/* if sends packets w/ unknown dest. */
-#define	IFBIF_STP		0x04	/* if participates in spanning tree */
-#define	IFBIF_SPAN		0x08	/* if is a span port */
-
-#define	IFBIFBITS	"\020\1LEARNING\2DISCOVER\3STP\4SPAN"
+#define	IFBIF_LEARNING		0x0001	/* if can learn */
+#define	IFBIF_DISCOVER		0x0002	/* if sends packets w/ unknown dest. */
+#define	IFBIF_STP		0x0004	/* if participates in spanning tree */
+#define	IFBIF_SPAN		0x0008	/* if is a span port */
+#define	IFBIF_STICKY		0x0010	/* if learned addresses stick */
+#define	IFBIF_BSTP_EDGE		0x0020	/* member stp edge port */
+#define	IFBIF_BSTP_AUTOEDGE	0x0040	/* member stp autoedge enabled */
+#define	IFBIF_BSTP_PTP		0x0080	/* member stp point to point */
+#define	IFBIF_BSTP_AUTOPTP	0x0100	/* member stp autoptp enabled */
+#define	IFBIF_BSTP_ADMEDGE	0x0200	/* member stp admin edge enabled */
+#define	IFBIF_BSTP_ADMCOST	0x0400	/* member stp admin path cost */
+#define	IFBIF_PRIVATE		0x0800	/* if is a private segment */
+
+#define	IFBIFBITS	"\020\001LEARNING\002DISCOVER\003STP\004SPAN" \
+			"\005STICKY\014PRIVATE\006EDGE\007AUTOEDGE\010PTP" \
+			"\011AUTOPTP"
+#define	IFBIFMASK	~(IFBIF_BSTP_EDGE|IFBIF_BSTP_AUTOEDGE|IFBIF_BSTP_PTP| \
+			IFBIF_BSTP_AUTOPTP|IFBIF_BSTP_ADMEDGE| \
+			IFBIF_BSTP_ADMCOST)	/* not saved */
 
 /* BRDGFLUSH */
 #define	IFBF_FLUSHDYN		0x00	/* flush learned addresses only */
 #define	IFBF_FLUSHALL		0x01	/* flush all addresses */
 
-/* STP port states */
-#define	BSTP_IFSTATE_DISABLED	0
-#define	BSTP_IFSTATE_LISTENING	1
-#define	BSTP_IFSTATE_LEARNING	2
-#define	BSTP_IFSTATE_FORWARDING	3
-#define	BSTP_IFSTATE_BLOCKING	4
-
 /*
  * Interface list structure.
  */
@@ -161,13 +177,15 @@
 	unsigned long	ifba_expire;		/* address expire time */
 	uint8_t		ifba_flags;		/* address flags */
 	uint8_t		ifba_dst[ETHER_ADDR_LEN];/* destination address */
+	uint16_t	ifba_vlan;		/* vlan id */
 };
 
 #define	IFBAF_TYPEMASK	0x03	/* address type mask */
 #define	IFBAF_DYNAMIC	0x00	/* dynamically learned address */
 #define	IFBAF_STATIC	0x01	/* static address */
+#define	IFBAF_STICKY	0x02	/* sticky address */
 
-#define	IFBAFBITS	"\020\1STATIC"
+#define	IFBAFBITS	"\020\1STATIC\2STICKY"
 
 /*
  * Address list structure.
@@ -195,114 +213,57 @@
 #define	ifbrp_csize	ifbrp_ifbrpu.ifbrpu_int32	/* cache size */
 #define	ifbrp_ctime	ifbrp_ifbrpu.ifbrpu_int32	/* cache time (sec) */
 #define	ifbrp_prio	ifbrp_ifbrpu.ifbrpu_int16	/* bridge priority */
+#define	ifbrp_proto	ifbrp_ifbrpu.ifbrpu_int8	/* bridge protocol */
+#define	ifbrp_txhc	ifbrp_ifbrpu.ifbrpu_int8	/* bpdu tx holdcount */
 #define	ifbrp_hellotime	ifbrp_ifbrpu.ifbrpu_int8	/* hello time (sec) */
 #define	ifbrp_fwddelay	ifbrp_ifbrpu.ifbrpu_int8	/* fwd time (sec) */
 #define	ifbrp_maxage	ifbrp_ifbrpu.ifbrpu_int8	/* max age (sec) */
-
-#ifdef _KERNEL
+#define	ifbrp_cexceeded ifbrp_ifbrpu.ifbrpu_int32	/* # of cache dropped
+							 * adresses */
 /*
- * Timekeeping structure used in spanning tree code.
+ * Bridge current operational parameters structure.
  */
-struct bridge_timer {
-	uint16_t	active;
-	uint16_t	value;
-};
-
-struct bstp_config_unit {
-	uint64_t	cu_rootid;
-	uint64_t	cu_bridge_id;
-	uint32_t	cu_root_path_cost;
-	uint16_t	cu_message_age;
-	uint16_t	cu_max_age;
-	uint16_t	cu_hello_time;
-	uint16_t	cu_forward_delay;
-	uint16_t	cu_port_id;
-	uint8_t		cu_message_type;
-	uint8_t		cu_topology_change_acknowledgment;
-	uint8_t		cu_topology_change;
-};
-
-struct bstp_tcn_unit {
-	uint8_t		tu_message_type;
+struct ifbropreq {
+	uint8_t		ifbop_holdcount;
+	uint8_t		ifbop_maxage;
+	uint8_t		ifbop_hellotime;
+	uint8_t		ifbop_fwddelay;
+	uint8_t		ifbop_protocol;
+	uint16_t	ifbop_priority;
+	uint16_t	ifbop_root_port;
+	uint32_t	ifbop_root_path_cost;
+	uint64_t	ifbop_bridgeid;
+	uint64_t	ifbop_designated_root;
+	uint64_t	ifbop_designated_bridge;
+	struct timeval	ifbop_last_tc_time;
 };
 
 /*
- * Bridge interface list entry.
+ * Bridge member operational STP params structure.
  */
-struct bridge_iflist {
-	LIST_ENTRY(bridge_iflist) bif_next;
-	uint64_t		bif_designated_root;
-	uint64_t		bif_designated_bridge;
-	uint32_t		bif_path_cost;
-	uint32_t		bif_designated_cost;
-	struct bridge_timer	bif_hold_timer;
-	struct bridge_timer	bif_message_age_timer;
-	struct bridge_timer	bif_forward_delay_timer;
-	struct bstp_config_unit	bif_config_bpdu;
-	uint16_t		bif_port_id;
-	uint16_t		bif_designated_port;
-	uint8_t			bif_state;
-	uint8_t			bif_topology_change_acknowledge;
-	uint8_t			bif_config_pending;
-	uint8_t			bif_change_detection_enabled;
-	uint8_t			bif_priority;
-	struct ifnet		*bif_ifp;	/* member if */
-	uint32_t		bif_flags;	/* member if flags */
-	int			bif_mutecap;	/* member muted caps */
+struct ifbpstpreq {
+	uint8_t		ifbp_portno;		/* bp STP port number */
+	uint32_t	ifbp_fwd_trans;		/* bp STP fwd transitions */
+	uint32_t	ifbp_design_cost;	/* bp STP designated cost */
+	uint32_t	ifbp_design_port;	/* bp STP designated port */
+	uint64_t	ifbp_design_bridge;	/* bp STP designated bridge */
+	uint64_t	ifbp_design_root;	/* bp STP designated root */
 };
 
 /*
- * Bridge route node.
+ * Bridge STP ports list structure.
  */
-struct bridge_rtnode {
-	LIST_ENTRY(bridge_rtnode) brt_hash;	/* hash table linkage */
-	LIST_ENTRY(bridge_rtnode) brt_list;	/* list linkage */
-	struct ifnet		*brt_ifp;	/* destination if */
-	unsigned long		brt_expire;	/* expiration time */
-	uint8_t			brt_flags;	/* address flags */
-	uint8_t			brt_addr[ETHER_ADDR_LEN];
+struct ifbpstpconf {
+	uint32_t	ifbpstp_len;	/* buffer size */
+	union {
+		caddr_t	ifbpstpu_buf;
+		struct ifbpstpreq *ifbpstpu_req;
+	} ifbpstp_ifbpstpu;
+#define	ifbpstp_buf	ifbpstp_ifbpstpu.ifbpstpu_buf
+#define	ifbpstp_req	ifbpstp_ifbpstpu.ifbpstpu_req
 };
 
-/*
- * Software state for each bridge.
- */
-struct bridge_softc {
-	struct ifnet		*sc_ifp;	/* make this an interface */
-	LIST_ENTRY(bridge_softc) sc_list;
-	struct mtx		sc_mtx;
-	struct cv		sc_cv;
-	uint64_t		sc_designated_root;
-	uint64_t		sc_bridge_id;
-	struct bridge_iflist	*sc_root_port;
-	uint32_t		sc_root_path_cost;
-	uint16_t		sc_max_age;
-	uint16_t		sc_hello_time;
-	uint16_t		sc_forward_delay;
-	uint16_t		sc_bridge_max_age;
-	uint16_t		sc_bridge_hello_time;
-	uint16_t		sc_bridge_forward_delay;
-	uint16_t		sc_topology_change_time;
-	uint16_t		sc_hold_time;
-	uint16_t		sc_bridge_priority;
-	uint8_t			sc_topology_change_detected;
-	uint8_t			sc_topology_change;
-	struct bridge_timer	sc_hello_timer;
-	struct bridge_timer	sc_topology_change_timer;
-	struct bridge_timer	sc_tcn_timer;
-	uint32_t		sc_brtmax;	/* max # of addresses */
-	uint32_t		sc_brtcnt;	/* cur. # of addresses */
-	uint32_t		sc_brttimeout;	/* rt timeout in seconds */
-	struct callout		sc_brcallout;	/* bridge callout */
-	struct callout		sc_bstpcallout;	/* STP callout */
-	uint32_t		sc_iflist_ref;	/* refcount for sc_iflist */
-	uint32_t		sc_iflist_xcnt;	/* refcount for sc_iflist */
-	LIST_HEAD(, bridge_iflist) sc_iflist;	/* member interface list */
-	LIST_HEAD(, bridge_rtnode) *sc_rthash;	/* our forwarding table */
-	LIST_HEAD(, bridge_rtnode) sc_rtlist;	/* list version of above */
-	uint32_t		sc_rthash_key;	/* key for hash */
-	LIST_HEAD(, bridge_iflist) sc_spanlist;	/* span ports list */
-	struct bridge_timer	sc_link_timer;
-};
+#ifdef _KERNEL
 
 #define BRIDGE_LOCK_INIT(_sc)		do {			\
 	mtx_init(&(_sc)->sc_mtx, "if_bridge", NULL, MTX_DEF);	\
@@ -314,7 +275,6 @@
 } while (0)
 #define BRIDGE_LOCK(_sc)		mtx_lock(&(_sc)->sc_mtx)
 #define BRIDGE_UNLOCK(_sc)		mtx_unlock(&(_sc)->sc_mtx)
-#define BRIDGE_LOCKED(_sc)		mtx_owned(&(_sc)->sc_mtx)
 #define BRIDGE_LOCK_ASSERT(_sc)		mtx_assert(&(_sc)->sc_mtx, MA_OWNED)
 #define	BRIDGE_LOCK2REF(_sc, _err)	do {	\
 	mtx_assert(&(_sc)->sc_mtx, MA_OWNED);	\
@@ -356,21 +316,9 @@
 	_err = (*bridge_output_p)(_ifp, _m, NULL, NULL);	\
 } while (0)
 
-extern const uint8_t bstp_etheraddr[];
-
-void	bridge_enqueue(struct bridge_softc *, struct ifnet *, struct mbuf *);
-void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
-
 extern	struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
 extern	int (*bridge_output_p)(struct ifnet *, struct mbuf *,
 		struct sockaddr *, struct rtentry *);
 extern	void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
-extern	void (*bstp_linkstate_p)(struct ifnet *ifp, int state);
-
-void	bstp_initialization(struct bridge_softc *);
-void	bstp_linkstate(struct ifnet *, int);
-void	bstp_stop(struct bridge_softc *);
-struct mbuf *bstp_input(struct ifnet *, struct mbuf *);
-
 
 #endif /* _KERNEL */
--- sys/net/bridge.c
+++ /dev/null
@@ -1,1302 +0,0 @@
-/*-
- * Copyright (c) 1998-2002 Luigi Rizzo
- *
- * Work partly supported by: Cisco Systems, Inc. - NSITE lab, RTP, NC
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/net/bridge.c,v 1.93.2.1 2005/08/25 05:01:19 rwatson Exp $
- */
-
-/*
- * This code implements bridging in FreeBSD. It only acts on ethernet
- * interfaces, including VLANs (others are still usable for routing).
- * A FreeBSD host can implement multiple logical bridges, called
- * "clusters". Each cluster is made of a set of interfaces, and
- * identified by a "cluster-id" which is a number in the range 1..2^16-1.
- *
- * Bridging is enabled by the sysctl variable
- *	net.link.ether.bridge.enable
- * the grouping of interfaces into clusters is done with
- *	net.link.ether.bridge.config
- * containing a list of interfaces each optionally followed by
- * a colon and the cluster it belongs to (1 is the default).
- * Separators can be spaces, commas or tabs, e.g.
- *	net.link.ether.bridge.config="fxp0:2 fxp1:2 dc0 dc1:1"
- * Optionally bridged packets can be passed through the firewall,
- * this is controlled by the variable
- *	net.link.ether.bridge.ipfw
- *
- * For each cluster there is a descriptor (cluster_softc) storing
- * the following data structures:
- * - a hash table with the MAC address and destination interface for each
- *   known node. The table is indexed using a hash of the source address.
- * - an array with the MAC addresses of the interfaces used in the cluster.
- *
- * Input packets are tapped near the beginning of ether_input(), and
- * analysed by bridge_in(). Depending on the result, the packet
- * can be forwarded to one or more output interfaces using bdg_forward(),
- * and/or sent to the upper layer (e.g. in case of multicast).
- *
- * Output packets are intercepted near the end of ether_output().
- * The correct destination is selected by bridge_dst_lookup(),
- * and then forwarding is done by bdg_forward().
- *
- * The arp code is also modified to let a machine answer to requests
- * irrespective of the port the request came from.
- *
- * In case of loops in the bridging topology, the bridge detects this
- * event and temporarily mutes output bridging on one of the ports.
- * Periodically, interfaces are unmuted by bdg_timeout().
- * Muting is only implemented as a safety measure, and also as
- * a mechanism to support a user-space implementation of the spanning
- * tree algorithm.
- *
- * To build a bridging kernel, use the following option
- *    option BRIDGE
- * and then at runtime set the sysctl variable to enable bridging.
- *
- * Only one interface per cluster is supposed to have addresses set (but
- * there are no substantial problems if you set addresses for none or
- * for more than one interface).
- * Bridging will act before routing, but nothing prevents a machine
- * from doing both (modulo bugs in the implementation...).
- *
- * THINGS TO REMEMBER
- *  - bridging is incompatible with multicast routing on the same
- *    machine. There is not an easy fix to this.
- *  - be very careful when bridging VLANs
- *  - loop detection is still not very robust.
- */
-
-#include <sys/param.h>
-#include <sys/mbuf.h>
-#include <sys/malloc.h>
-#include <sys/protosw.h>
-#include <sys/systm.h>
-#include <sys/socket.h> /* for net/if.h */
-#include <sys/ctype.h>	/* string functions */
-#include <sys/kernel.h>
-#include <sys/module.h>
-#include <sys/sysctl.h>
-
-#include <net/ethernet.h>
-#include <net/if.h>
-#include <net/if_arp.h>		/* for struct arpcom */
-#include <net/if_types.h>
-#include <net/if_var.h>
-#include <net/pfil.h>
-
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-#include <netinet/ip.h>
-#include <netinet/ip_var.h>
-
-#include <net/route.h>
-#include <netinet/ip_fw.h>
-#include <netinet/ip_dummynet.h>
-#include <net/bridge.h>
-
-/*--------------------*/
-
-#define	ETHER_ADDR_COPY(_dst,_src)	bcopy(_src, _dst, ETHER_ADDR_LEN)
-#define	ETHER_ADDR_EQ(_a1,_a2)		(bcmp(_a1, _a2, ETHER_ADDR_LEN) == 0)
-
-/*
- * For each cluster, source MAC addresses are stored into a hash
- * table which locates the port they reside on.
- */
-#define HASH_SIZE 8192	/* Table size, must be a power of 2 */
-
-typedef struct hash_table {		/* each entry.		*/
-    struct ifnet *	name;
-    u_char		etheraddr[ETHER_ADDR_LEN];
-    u_int16_t		used;		/* also, padding	*/
-} bdg_hash_table ;
-
-/*
- * The hash function applied to MAC addresses. Out of the 6 bytes,
- * the last ones tend to vary more. Since we are on a little endian machine,
- * we have to do some gimmick...
- */
-#define HASH_FN(addr)   (	\
-    ntohs( ((u_int16_t *)addr)[1] ^ ((u_int16_t *)addr)[2] ) & (HASH_SIZE -1))
-
-/*
- * This is the data structure where local addresses are stored.
- */
-struct bdg_addr {
-    u_char	etheraddr[ETHER_ADDR_LEN];
-    u_int16_t	_padding;
-};
-
-/*
- * The configuration of each cluster includes the cluster id, a pointer to
- * the hash table, and an array of local MAC addresses (of size "ports").
- */
-struct cluster_softc {
-    u_int16_t	cluster_id;
-    u_int16_t	ports;
-    bdg_hash_table *ht;
-    struct bdg_addr	*my_macs;	/* local MAC addresses */
-};
-
-
-extern struct protosw inetsw[];			/* from netinet/ip_input.c */
-
-static int n_clusters;				/* number of clusters */
-static struct cluster_softc *clusters;
-
-static struct mtx bdg_mtx;
-#define	BDG_LOCK_INIT()		mtx_init(&bdg_mtx, "bridge", NULL, MTX_DEF)
-#define	BDG_LOCK_DESTROY()	mtx_destroy(&bdg_mtx)
-#define	BDG_LOCK()		mtx_lock(&bdg_mtx)
-#define	BDG_UNLOCK()		mtx_unlock(&bdg_mtx)
-#define	BDG_LOCK_ASSERT()	mtx_assert(&bdg_mtx, MA_OWNED)
-
-#define BDG_MUTED(ifp) (ifp2sc[ifp->if_index].flags & IFF_MUTE)
-#define BDG_MUTE(ifp) ifp2sc[ifp->if_index].flags |= IFF_MUTE
-#define BDG_CLUSTER(ifp) (ifp2sc[ifp->if_index].cluster)
-
-#define BDG_SAMECLUSTER(ifp,src) \
-	(src == NULL || BDG_CLUSTER(ifp) == BDG_CLUSTER(src) )
-
-#ifdef __i386__
-#define BDG_MATCH(a,b) ( \
-    ((u_int16_t *)(a))[2] == ((u_int16_t *)(b))[2] && \
-    *((u_int32_t *)(a)) == *((u_int32_t *)(b)) )
-#define IS_ETHER_BROADCAST(a) ( \
-	*((u_int32_t *)(a)) == 0xffffffff && \
-	((u_int16_t *)(a))[2] == 0xffff )
-#else
-/* for machines that do not support unaligned access */
-#define	BDG_MATCH(a,b)		ETHER_ADDR_EQ(a,b)
-#define	IS_ETHER_BROADCAST(a)	ETHER_ADDR_EQ(a,"\377\377\377\377\377\377")
-#endif
-
-SYSCTL_DECL(_net_link_ether);
-SYSCTL_NODE(_net_link_ether, OID_AUTO, bridge, CTLFLAG_RD, 0,
-	"Bridge parameters");
-static char bridge_version[] = "031224";
-SYSCTL_STRING(_net_link_ether_bridge, OID_AUTO, version, CTLFLAG_RD,
-	bridge_version, 0, "software version");
-
-#define BRIDGE_DEBUG
-#ifdef BRIDGE_DEBUG
-int	bridge_debug = 0;
-SYSCTL_INT(_net_link_ether_bridge, OID_AUTO, debug, CTLFLAG_RW, &bridge_debug,
-	    0, "control debugging printfs");
-#define	DPRINTF(X)	if (bridge_debug) printf X
-#else
-#define	DPRINTF(X)
-#endif
-
-#ifdef BRIDGE_TIMING
-/*
- * For timing-related debugging, you can use the following macros.
- * remember, rdtsc() only works on Pentium-class machines
-
-    quad_t ticks;
-    DDB(ticks = rdtsc();)
-    ... interesting code ...
-    DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;)
-
- *
- */
-#define DDB(x)	x
-
-static int bdg_fw_avg;
-SYSCTL_INT(_net_link_ether_bridge, OID_AUTO, fw_avg, CTLFLAG_RW,
-	    &bdg_fw_avg, 0,"Cycle counter avg");
-static int bdg_fw_ticks;
-SYSCTL_INT(_net_link_ether_bridge, OID_AUTO, fw_ticks, CTLFLAG_RW,
-	    &bdg_fw_ticks, 0,"Cycle counter item");
-static int bdg_fw_count;
-SYSCTL_INT(_net_link_ether_bridge, OID_AUTO, fw_count, CTLFLAG_RW,
-	    &bdg_fw_count, 0,"Cycle counter count");
-#else
-#define	DDB(x)
-#endif
-
-static int bdginit(void);
-static void parse_bdg_cfg(void);
-static struct mbuf *bdg_forward(struct mbuf *, struct ifnet *);
-
-static int bdg_ipf;		/* IPFilter enabled in bridge */
-SYSCTL_INT(_net_link_ether_bridge, OID_AUTO, ipf, CTLFLAG_RW,
-	    &bdg_ipf, 0,"Pass bridged pkts through IPFilter");
-static int bdg_ipfw;
-SYSCTL_INT(_net_link_ether_bridge, OID_AUTO, ipfw, CTLFLAG_RW,
-	    &bdg_ipfw,0,"Pass bridged pkts through firewall");
-
-static int bdg_copy;
-SYSCTL_INT(_net_link_ether_bridge, OID_AUTO, copy, CTLFLAG_RW,
-	&bdg_copy, 0, "Force packet copy in bdg_forward");
-
-int bdg_ipfw_drops;
-SYSCTL_INT(_net_link_ether_bridge, OID_AUTO, ipfw_drop,
-	CTLFLAG_RW, &bdg_ipfw_drops,0,"");
-int bdg_ipfw_colls;
-SYSCTL_INT(_net_link_ether_bridge, OID_AUTO, ipfw_collisions,
-	CTLFLAG_RW, &bdg_ipfw_colls,0,"");
-
-static int bdg_thru;
-SYSCTL_INT(_net_link_ether_bridge, OID_AUTO, packets, CTLFLAG_RW,
-	&bdg_thru, 0, "Packets through bridge");
-static int bdg_dropped;
-SYSCTL_INT(_net_link_ether_bridge, OID_AUTO, dropped, CTLFLAG_RW,
-	&bdg_dropped, 0, "Packets dropped in bdg_forward");
-static int bdg_predict;
-SYSCTL_INT(_net_link_ether_bridge, OID_AUTO, predict, CTLFLAG_RW,
-	&bdg_predict, 0, "Correctly predicted header location");
-
-#ifdef BRIDGE_DEBUG
-static char *bdg_dst_names[] = {
-	"BDG_NULL    ",
-	"BDG_BCAST   ",
-	"BDG_MCAST   ",
-	"BDG_LOCAL   ",
-	"BDG_DROP    ",
-	"BDG_UNKNOWN ",
-	"BDG_IN      ",
-	"BDG_OUT     ",
-	"BDG_FORWARD " };
-#endif /* BRIDGE_DEBUG */
-
-/*
- * System initialization
- */
-static struct bdg_stats bdg_stats ;
-SYSCTL_STRUCT(_net_link_ether_bridge, OID_AUTO, stats, CTLFLAG_RD,
-	&bdg_stats, bdg_stats, "bridge statistics");
-
-static struct callout bdg_callout;
-
-/*
- * Add an interface to a cluster, possibly creating a new entry in
- * the cluster table. This requires reallocation of the table and
- * updating pointers in ifp2sc.
- */
-static struct cluster_softc *
-add_cluster(u_int16_t cluster_id, struct ifnet *ifp)
-{
-    struct cluster_softc *c = NULL;
-    int i;
-
-    BDG_LOCK_ASSERT();
-
-    for (i = 0; i < n_clusters ; i++)
-	if (clusters[i].cluster_id == cluster_id)
-	    goto found;
-
-    /* Not found, need to reallocate */
-    c = malloc((1+n_clusters) * sizeof (*c), M_IFADDR, M_NOWAIT | M_ZERO);
-    if (c == NULL) {/* malloc failure */
-	printf("-- bridge: cannot add new cluster\n");
-	goto bad;
-    }
-    c[n_clusters].ht = (struct hash_table *)
-	    malloc(HASH_SIZE * sizeof(struct hash_table),
-		M_IFADDR, M_NOWAIT | M_ZERO);
-    if (c[n_clusters].ht == NULL) {
-	printf("-- bridge: cannot allocate hash table for new cluster\n");
-	goto bad;
-    }
-    c[n_clusters].my_macs = (struct bdg_addr *)
-	    malloc(BDG_MAX_PORTS * sizeof(struct bdg_addr),
-		M_IFADDR, M_NOWAIT | M_ZERO);
-    if (c[n_clusters].my_macs == NULL) {
-        printf("-- bridge: cannot allocate mac addr table for new cluster\n");
-	free(c[n_clusters].ht, M_IFADDR);
-	goto bad;
-    }
-
-    c[n_clusters].cluster_id = cluster_id;
-    c[n_clusters].ports = 0;
-    /*
-     * now copy old descriptors here
-     */
-    if (n_clusters > 0) {
-	for (i=0; i < n_clusters; i++)
-	    c[i] = clusters[i];
-	/*
-	 * and finally update pointers in ifp2sc
-	 */
-	for (i = 0 ; i < if_index && i < BDG_MAX_PORTS; i++)
-	    if (ifp2sc[i].cluster != NULL)
-		ifp2sc[i].cluster = c + (ifp2sc[i].cluster - clusters);
-	free(clusters, M_IFADDR);
-    }
-    clusters = c;
-    i = n_clusters;		/* index of cluster entry */
-    n_clusters++;
-found:
-    c = clusters + i;		/* the right cluster ... */
-    ETHER_ADDR_COPY(c->my_macs[c->ports].etheraddr, IFP2ENADDR(ifp));
-    c->ports++;
-    return c;
-bad:
-    if (c)
-	free(c, M_IFADDR);
-    return NULL;
-}
-
-
-/*
- * Turn off bridging, by clearing promisc mode on the interface,
- * marking the interface as unused, and clearing the name in the
- * stats entry.
- * Also dispose the hash tables associated with the clusters.
- */
-static void
-bridge_off(void)
-{
-    struct ifnet *ifp ;
-    int i;
-
-    BDG_LOCK_ASSERT();
-
-    DPRINTF(("%s: n_clusters %d\n", __func__, n_clusters));
-
-    IFNET_RLOCK();
-    TAILQ_FOREACH(ifp, &ifnet, if_link) {
-	struct bdg_softc *b;
-
-	if (ifp->if_index >= BDG_MAX_PORTS)
-	    continue;	/* make sure we do not go beyond the end */
-	b = &ifp2sc[ifp->if_index];
-
-	if ( b->flags & IFF_BDG_PROMISC ) {
-	    ifpromisc(ifp, 0);
-	    b->flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ;
-	    DPRINTF(("%s: %s promisc OFF if_flags 0x%x "
-		"bdg_flags 0x%x\n", __func__, ifp->if_xname,
-		ifp->if_flags, b->flags));
-	}
-	b->flags &= ~(IFF_USED) ;
-	b->cluster = NULL;
-	bdg_stats.s[ifp->if_index].name[0] = '\0';
-    }
-    IFNET_RUNLOCK();
-    /* flush_tables */
-
-    for (i=0; i < n_clusters; i++) {
-	free(clusters[i].ht, M_IFADDR);
-	free(clusters[i].my_macs, M_IFADDR);
-    }
-    if (clusters != NULL)
-	free(clusters, M_IFADDR);
-    clusters = NULL;
-    n_clusters =0;
-}
-
-/*
- * set promisc mode on the interfaces we use.
- */
-static void
-bridge_on(void)
-{
-    struct ifnet *ifp ;
-
-    BDG_LOCK_ASSERT();
-
-    IFNET_RLOCK();
-    TAILQ_FOREACH(ifp, &ifnet, if_link) {
-	struct bdg_softc *b = &ifp2sc[ifp->if_index];
-
-	if ( !(b->flags & IFF_USED) )
-	    continue ;
-	if ( !( ifp->if_flags & IFF_UP) ) {
-	    if_up(ifp);
-	}
-	if ( !(b->flags & IFF_BDG_PROMISC) ) {
-	    (void) ifpromisc(ifp, 1);
-	    b->flags |= IFF_BDG_PROMISC ;
-	    DPRINTF(("%s: %s promisc ON if_flags 0x%x bdg_flags 0x%x\n",
-		__func__, ifp->if_xname, ifp->if_flags, b->flags));
-	}
-	if (b->flags & IFF_MUTE) {
-	    DPRINTF(("%s: unmuting %s\n", __func__, ifp->if_xname));
-	    b->flags &= ~IFF_MUTE;
-	}
-    }
-    IFNET_RUNLOCK();
-}
-
-static char bridge_cfg[1024];		/* NB: in BSS so initialized to zero */
-
-/**
- * reconfigure bridge.
- * This is also done every time we attach or detach an interface.
- * Main use is to make sure that we do not bridge on some old
- * (ejected) device. So, it would be really useful to have a
- * pointer to the modified device as an argument. Without it, we
- * have to scan all interfaces.
- */
-static void
-reconfigure_bridge_locked(void)
-{
-    BDG_LOCK_ASSERT();
-
-    bridge_off();
-    if (do_bridge) {
-	if (if_index >= BDG_MAX_PORTS) {
-	    printf("-- sorry too many interfaces (%d, max is %d),"
-		" disabling bridging\n", if_index, BDG_MAX_PORTS);
-	    do_bridge = 0;
-	    return;
-	}
-	parse_bdg_cfg();
-	bridge_on();
-    }
-}
-
-static void
-reconfigure_bridge(void)
-{
-    BDG_LOCK();
-    reconfigure_bridge_locked();
-    BDG_UNLOCK();
-}
-
-/*
- * parse the config string, set IFF_USED, name and cluster_id
- * for all interfaces found.
- * The config string is a list of "if[:cluster]" with
- * a number of possible separators (see "sep"). In particular the
- * use of the space lets you set bridge_cfg with the output from
- * "ifconfig -l"
- */
-static void
-parse_bdg_cfg(void)
-{
-    char *p, *beg;
-    int l, cluster;
-    static const char *sep = ", \t";
-
-    BDG_LOCK_ASSERT();
-
-    for (p = bridge_cfg; *p ; p++) {
-	struct ifnet *ifp;
-	int found = 0;
-	char c;
-
-	if (index(sep, *p))	/* skip separators */
-	    continue ;
-	/* names are lowercase and digits */
-	for ( beg = p ; islower(*p) || isdigit(*p) ; p++ )
-	    ;
-	l = p - beg ;		/* length of name string */
-	if (l == 0)		/* invalid name */
-	    break ;
-	if ( *p != ':' )	/* no ':', assume default cluster 1 */
-	    cluster = 1 ;
-	else			/* fetch cluster */
-	    cluster = strtoul( p+1, &p, 10);
-	c = *p;
-	*p = '\0';
-	/*
-	 * now search in interface list for a matching name
-	 */
-	IFNET_RLOCK();		/* could sleep XXX */
-	TAILQ_FOREACH(ifp, &ifnet, if_link) {
-
-	    if (!strncmp(beg, ifp->if_xname, max(l, strlen(ifp->if_xname)))) {
-		struct bdg_softc *b = &ifp2sc[ifp->if_index];
-		if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN) {
-		    printf("%s is not an ethernet, continue\n", ifp->if_xname);
-		    continue;
-		}
-		if (b->flags & IFF_USED) {
-		    printf("%s already used, skipping\n", ifp->if_xname);
-		    break;
-		}
-		b->cluster = add_cluster(htons(cluster), ifp);
-		b->flags |= IFF_USED ;
-		snprintf(bdg_stats.s[ifp->if_index].name,
-		    sizeof(bdg_stats.s[ifp->if_index].name),
-		    "%s:%d", ifp->if_xname, cluster);
-
-		DPRINTF(("%s: found %s next c %d\n", __func__,
-		    bdg_stats.s[ifp->if_index].name, c));
-		found = 1;
-		break ;
-	    }
-	}
-	IFNET_RUNLOCK();
-	if (!found)
-	    printf("interface %s Not found in bridge\n", beg);
-	*p = c;
-	if (c == '\0')
-	    break; /* no more */
-    }
-}
-
-/*
- * handler for net.link.ether.bridge
- */
-static int
-sysctl_bdg(SYSCTL_HANDLER_ARGS)
-{
-    int enable = do_bridge;
-    int error;
-
-    error = sysctl_handle_int(oidp, &enable, 0, req);
-    enable = (enable) ? 1 : 0;
-    BDG_LOCK();
-    if (enable != do_bridge) {
-	do_bridge = enable;
-	reconfigure_bridge_locked();
-    }
-    BDG_UNLOCK();
-    return error ;
-}
-SYSCTL_PROC(_net_link_ether_bridge, OID_AUTO, enable, CTLTYPE_INT|CTLFLAG_RW,
-	    &do_bridge, 0, &sysctl_bdg, "I", "Bridging");
-
-/*
- * handler for net.link.ether.bridge_cfg
- */
-static int
-sysctl_bdg_cfg(SYSCTL_HANDLER_ARGS)
-{
-    int error;
-    char *new_cfg;
-
-    new_cfg = malloc(sizeof(bridge_cfg), M_TEMP, M_WAITOK);
-    bcopy(bridge_cfg, new_cfg, sizeof(bridge_cfg));
-
-    error = sysctl_handle_string(oidp, new_cfg, oidp->oid_arg2, req);
-    if (error == 0) {
-        BDG_LOCK();
-	if (strcmp(new_cfg, bridge_cfg)) {
-	    bcopy(new_cfg, bridge_cfg, sizeof(bridge_cfg));
-	    reconfigure_bridge_locked();
-	}
-	BDG_UNLOCK();
-    }
-
-    free(new_cfg, M_TEMP);
-
-    return error;
-}
-SYSCTL_PROC(_net_link_ether_bridge, OID_AUTO, config, CTLTYPE_STRING|CTLFLAG_RW,
-	    &bridge_cfg, sizeof(bridge_cfg), &sysctl_bdg_cfg, "A",
-	    "Bridge configuration");
-
-static int
-sysctl_refresh(SYSCTL_HANDLER_ARGS)
-{
-    if (req->newptr)
-	reconfigure_bridge();
-
-    return 0;
-}
-SYSCTL_PROC(_net_link_ether_bridge, OID_AUTO, refresh, CTLTYPE_INT|CTLFLAG_WR,
-	    NULL, 0, &sysctl_refresh, "I", "iface refresh");
-
-#ifndef BURN_BRIDGES
-#define SYSCTL_OID_COMPAT(parent, nbr, name, kind, a1, a2, handler, fmt, descr)\
-	static struct sysctl_oid sysctl__##parent##_##name##_compat = {	 \
-		&sysctl_##parent##_children, { 0 },			 \
-		nbr, kind, a1, a2, #name, handler, fmt, 0, descr };	 \
-	DATA_SET(sysctl_set, sysctl__##parent##_##name##_compat)
-#define SYSCTL_INT_COMPAT(parent, nbr, name, access, ptr, val, descr)	 \
-	SYSCTL_OID_COMPAT(parent, nbr, name, CTLTYPE_INT|(access),	 \
-		ptr, val, sysctl_handle_int, "I", descr)
-#define SYSCTL_STRUCT_COMPAT(parent, nbr, name, access, ptr, type, descr)\
-	SYSCTL_OID_COMPAT(parent, nbr, name, CTLTYPE_OPAQUE|(access),	 \
-		ptr, sizeof(struct type), sysctl_handle_opaque,		 \
-		"S," #type, descr)
-#define SYSCTL_PROC_COMPAT(parent, nbr, name, access, ptr, arg, handler, fmt, descr) \
-	SYSCTL_OID_COMPAT(parent, nbr, name, (access),			 \
-		ptr, arg, handler, fmt, descr)
-
-SYSCTL_INT_COMPAT(_net_link_ether, OID_AUTO, bridge_ipf, CTLFLAG_RW,
-	    &bdg_ipf, 0,"Pass bridged pkts through IPFilter");
-SYSCTL_INT_COMPAT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW,
-	    &bdg_ipfw,0,"Pass bridged pkts through firewall");
-SYSCTL_STRUCT_COMPAT(_net_link_ether, PF_BDG, bdgstats, CTLFLAG_RD,
-	&bdg_stats, bdg_stats, "bridge statistics");
-SYSCTL_PROC_COMPAT(_net_link_ether, OID_AUTO, bridge_cfg, 
-	    CTLTYPE_STRING|CTLFLAG_RW,
-	    &bridge_cfg, sizeof(bridge_cfg), &sysctl_bdg_cfg, "A",
-	    "Bridge configuration");
-SYSCTL_PROC_COMPAT(_net_link_ether, OID_AUTO, bridge_refresh,
-	    CTLTYPE_INT|CTLFLAG_WR,
-	    NULL, 0, &sysctl_refresh, "I", "iface refresh");
-#endif
-
-static int bdg_loops;
-static int bdg_slowtimer = 0;
-static int bdg_age_index = 0;	/* index of table position to age */
-
-/*
- * called periodically to flush entries etc.
- */
-static void
-bdg_timeout(void *dummy)
-{
-    if (do_bridge) {
-	int l, i;
-
-	BDG_LOCK();
-	/*
-	 * age entries in the forwarding table.
-	 */
-	l = bdg_age_index + HASH_SIZE/4 ;
-	if (l > HASH_SIZE)
-	    l = HASH_SIZE;
-
-	for (i = 0; i < n_clusters; i++) {
-	    bdg_hash_table *bdg_table = clusters[i].ht;
-	    for (; bdg_age_index < l; bdg_age_index++)
-		if (bdg_table[bdg_age_index].used)
-		    bdg_table[bdg_age_index].used = 0;
-		else if (bdg_table[bdg_age_index].name) {
-		    DPRINTF(("%s: flushing stale entry %d\n",
-			__func__, bdg_age_index));
-		    bdg_table[bdg_age_index].name = NULL;
-		}
-	}
-	if (bdg_age_index >= HASH_SIZE)
-	    bdg_age_index = 0;
-
-	if (--bdg_slowtimer <= 0 ) {
-	    bdg_slowtimer = 5;
-
-	    bridge_on();	/* we just need unmute, really */
-	    bdg_loops = 0;
-	}
-	BDG_UNLOCK();
-    }
-    callout_reset(&bdg_callout, 2*hz, bdg_timeout, NULL);
-}
-
-/*
- * Find the right pkt destination:
- *	BDG_BCAST	is a broadcast
- *	BDG_MCAST	is a multicast
- *	BDG_LOCAL	is for a local address
- *	BDG_DROP	must be dropped
- *	other		ifp of the dest. interface (incl.self)
- *
- * We assume this is only called for interfaces for which bridging
- * is enabled, i.e. BDG_USED(ifp) is true.
- */
-static __inline struct ifnet *
-bridge_dst_lookup(struct ether_header *eh, struct cluster_softc *c)
-{
-    bdg_hash_table *bt;		/* pointer to entry in hash table */
-
-    BDG_LOCK_ASSERT();
-
-    if (ETHER_IS_MULTICAST(eh->ether_dhost))
-	return IS_ETHER_BROADCAST(eh->ether_dhost) ? BDG_BCAST : BDG_MCAST;
-    /*
-     * Lookup local addresses in case one matches.  We optimize
-     * for the common case of two interfaces.
-     */
-    KASSERT(c->ports != 0, ("lookup with no ports!"));
-    switch (c->ports) {
-	int i;
-    default:
-	for (i = c->ports-1; i > 1; i--) {
-	    if (ETHER_ADDR_EQ(c->my_macs[i].etheraddr, eh->ether_dhost))
-	        return BDG_LOCAL;
-	}
-	/* fall thru... */
-    case 2:
-	if (ETHER_ADDR_EQ(c->my_macs[1].etheraddr, eh->ether_dhost))
-	    return BDG_LOCAL;
-    case 1:
-	if (ETHER_ADDR_EQ(c->my_macs[0].etheraddr, eh->ether_dhost))
-	    return BDG_LOCAL;
-    }
-    /*
-     * Look for a possible destination in table
-     */
-    bt = &c->ht[HASH_FN(eh->ether_dhost)];
-    if (bt->name && ETHER_ADDR_EQ(bt->etheraddr, eh->ether_dhost))
-	return bt->name;
-    else
-	return BDG_UNKNOWN;
-}
-
-/**
- * bridge_in() is invoked to perform bridging decision on input packets.
- *
- * On Input:
- *   eh		Ethernet header of the incoming packet.
- *   ifp	interface the packet is coming from.
- *
- * On Return: destination of packet, one of
- *   BDG_BCAST	broadcast
- *   BDG_MCAST  multicast
- *   BDG_LOCAL  is only for a local address (do not forward)
- *   BDG_DROP   drop the packet
- *   ifp	ifp of the destination interface.
- *
- * Forwarding is not done directly to give a chance to some drivers
- * to fetch more of the packet, or simply drop it completely.
- */
-
-static struct mbuf *
-bridge_in(struct ifnet *ifp, struct mbuf *m)
-{
-    struct ether_header *eh;
-    struct ifnet *dst, *old;
-    bdg_hash_table *bt;			/* location in hash table */
-    int dropit = BDG_MUTED(ifp);
-    int index;
-
-    eh = mtod(m, struct ether_header *);
-
-    /*
-     * hash the source address
-     */
-    BDG_LOCK();
-    index = HASH_FN(eh->ether_shost);
-    bt = &BDG_CLUSTER(ifp)->ht[index];
-    bt->used = 1;
-    old = bt->name;
-    if (old) {				/* the entry is valid */
-	if (!ETHER_ADDR_EQ(eh->ether_shost, bt->etheraddr)) {
-	    bdg_ipfw_colls++;
-	    bt->name = NULL;		/* NB: will overwrite below */
-	} else if (old != ifp) {
-	    /*
-	     * Found a loop. Either a machine has moved, or there
-	     * is a misconfiguration/reconfiguration of the network.
-	     * First, do not forward this packet!
-	     * Record the relocation anyways; then, if loops persist,
-	     * suspect a reconfiguration and disable forwarding
-	     * from the old interface.
-	     */
-	    bt->name = ifp;		/* relocate address */
-	    printf("-- loop (%d) %6D to %s from %s (%s)\n",
-			bdg_loops, eh->ether_shost, ".",
-			ifp->if_xname, old->if_xname,
-			BDG_MUTED(old) ? "muted":"active");
-	    dropit = 1;
-	    if (!BDG_MUTED(old)) {
-		if (bdg_loops++ > 10)
-		    BDG_MUTE(old);
-	    }
-	}
-    }
-
-    /*
-     * now write the source address into the table
-     */
-    if (bt->name == NULL) {
-	DPRINTF(("%s: new addr %6D at %d for %s\n",
-	    __func__, eh->ether_shost, ".", index, ifp->if_xname));
-	ETHER_ADDR_COPY(bt->etheraddr, eh->ether_shost);
-	bt->name = ifp;
-    }
-    dst = bridge_dst_lookup(eh, BDG_CLUSTER(ifp));
-    BDG_UNLOCK();
-
-    /*
-     * bridge_dst_lookup can return the following values:
-     *   BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp.
-     * For muted interfaces, or when we detect a loop, the first 3 are
-     * changed in BDG_LOCAL (we still listen to incoming traffic),
-     * and others to BDG_DROP (no use for the local host).
-     * Also, for incoming packets, ifp is changed to BDG_DROP if ifp == src.
-     * These changes are not necessary for outgoing packets from ether_output().
-     */
-    BDG_STAT(ifp, BDG_IN);
-    switch ((uintptr_t)dst) {
-    case (uintptr_t)BDG_BCAST:
-    case (uintptr_t)BDG_MCAST:
-    case (uintptr_t)BDG_LOCAL:
-    case (uintptr_t)BDG_UNKNOWN:
-    case (uintptr_t)BDG_DROP:
-	BDG_STAT(ifp, dst);
-	break;
-    default:
-	if (dst == ifp || dropit)
-	    BDG_STAT(ifp, BDG_DROP);
-	else
-	    BDG_STAT(ifp, BDG_FORWARD);
-	break;
-    }
-
-    if (dropit) {
-	if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL)
-	    dst = BDG_LOCAL;
-	else
-	    dst = BDG_DROP;
-    } else {
-	if (dst == ifp)
-	    dst = BDG_DROP;
-    }
-    DPRINTF(("%s: %6D ->%6D ty 0x%04x dst %s\n", __func__,
-	eh->ether_shost, ".",
-	eh->ether_dhost, ".",
-	ntohs(eh->ether_type),
-	(dst <= BDG_FORWARD) ? bdg_dst_names[(uintptr_t)dst] :
-		dst->if_xname));
-
-    switch ((uintptr_t)dst) {
-    case (uintptr_t)BDG_DROP:
-	m_freem(m);
-	return (NULL);
-
-    case (uintptr_t)BDG_LOCAL:
-	return (m);
-
-    case (uintptr_t)BDG_BCAST:
-    case (uintptr_t)BDG_MCAST:
-        m = bdg_forward(m, dst);
-#ifdef	DIAGNOSTIC
-	if (m == NULL)
-		if_printf(ifp, "bridge dropped %s packet\n",
-		     dst == BDG_BCAST ? "broadcast" : "multicast");
-#endif
-	return (m);
-
-    default:
-        m = bdg_forward(m, dst);
-	/*
-	 * But in some cases the bridge may return the
-	 * packet for us to free; sigh.
-	 */
-	if (m != NULL)
-		m_freem(m);
-
-    }
-
-    return (NULL);
-}
-
-/*
- * Return 1 if it's ok to send a packet out the specified interface.
- * The interface must be:
- *	used for bridging,
- *	not muted,
- *	not full,
- *	up and running,
- *	not the source interface, and
- *	belong to the same cluster as the 'real_dst'.
- */
-static __inline int
-bridge_ifok(struct ifnet *ifp, struct ifnet *src, struct ifnet *dst)
-{
-    return (BDG_USED(ifp)
-	&& !BDG_MUTED(ifp)
-	&& !_IF_QFULL(&ifp->if_snd)
-	&& (ifp->if_flags & IFF_UP)
-	&& (ifp->if_drv_flags & IFF_DRV_RUNNING)
-	&& ifp != src
-	&& BDG_SAMECLUSTER(ifp, dst));
-}
-
-/*
- * Forward a packet to dst -- which can be a single interface or
- * an entire cluster. The src port and muted interfaces are excluded.
- *
- * If src == NULL, the pkt comes from ether_output, and dst is the real
- * interface the packet is originally sent to. In this case, we must forward
- * it to the whole cluster.
- * We never call bdg_forward from ether_output on interfaces which are
- * not part of a cluster.
- *
- * If possible (i.e. we can determine that the caller does not need
- * a copy), the packet is consumed here, and bdg_forward returns NULL.
- * Otherwise, a pointer to a copy of the packet is returned.
- */
-static struct mbuf *
-bdg_forward(struct mbuf *m0, struct ifnet *dst)
-{
-#define	EH_RESTORE(_m) do {						   \
-    M_PREPEND((_m), ETHER_HDR_LEN, M_DONTWAIT);			   	   \
-    if ((_m) == NULL) {							   \
-	bdg_dropped++;							   \
-	return NULL;							   \
-    }									   \
-    if (eh != mtod((_m), struct ether_header *))			   \
-	bcopy(&save_eh, mtod((_m), struct ether_header *), ETHER_HDR_LEN); \
-    else								   \
-	bdg_predict++;							   \
-} while (0);
-    struct ether_header *eh;
-    struct ifnet *src;
-    struct ifnet *ifp, *last;
-    int shared = bdg_copy;		/* someone else is using the mbuf */
-    int error;
-    struct ifnet *real_dst = dst;	/* real dst from ether_output */
-    struct ip_fw_args args;
-    struct ether_header save_eh;
-    struct mbuf *m;
-
-    DDB(quad_t ticks; ticks = rdtsc();)
-
-    args.rule = ip_dn_claim_rule(m0);
-    if (args.rule)
-	shared = 0;			/* For sure this is our own mbuf. */
-    else
-	bdg_thru++;			/* count 1st time through bdg_forward */
-
-    /*
-     * The packet arrives with the Ethernet header at the front.
-     */
-    eh = mtod(m0, struct ether_header *);
-
-    src = m0->m_pkthdr.rcvif;
-    if (src == NULL) {			/* packet from ether_output */
-	BDG_LOCK();
-	dst = bridge_dst_lookup(eh, BDG_CLUSTER(real_dst));
-	BDG_UNLOCK();
-    }
-
-    if (dst == BDG_DROP) {		/* this should not happen */
-	printf("xx bdg_forward for BDG_DROP\n");
-	m_freem(m0);
-	bdg_dropped++;
-	return NULL;
-    }
-    if (dst == BDG_LOCAL) {		/* this should not happen as well */
-	printf("xx ouch, bdg_forward for local pkt\n");
-	return m0;
-    }
-    if (dst == BDG_BCAST || dst == BDG_MCAST) {
-	 /* need a copy for the local stack */
-	 shared = 1;
-    }
-
-    /*
-     * Do filtering in a very similar way to what is done in ip_output.
-     * Only if firewall is loaded, enabled, and the packet is not
-     * from ether_output() (src==NULL, or we would filter it twice).
-     * Additional restrictions may apply e.g. non-IP, short packets,
-     * and pkts already gone through a pipe.
-     */
-    if (src != NULL && (
-	(inet_pfil_hook.ph_busy_count >= 0 && bdg_ipf != 0) ||
-	(IPFW_LOADED && bdg_ipfw != 0))) {
-
-	int i;
-
-	if (args.rule != NULL && fw_one_pass)
-	    goto forward; /* packet already partially processed */
-	/*
-	 * i need some amt of data to be contiguous, and in case others need
-	 * the packet (shared==1) also better be in the first mbuf.
-	 */
-	i = min(m0->m_pkthdr.len, max_protohdr) ;
-	if (shared || m0->m_len < i) {
-	    m0 = m_pullup(m0, i);
-	    if (m0 == NULL) {
-		printf("%s: m_pullup failed\n", __func__);	/* XXXDPRINTF*/
-		bdg_dropped++;
-		return NULL;
-	    }
-	    eh = mtod(m0, struct ether_header *);
-	}
-
-	/*
-	 * Processing below expects the Ethernet header is stripped.
-	 * Furthermore, the mbuf chain might be replaced at various
-	 * places.  To deal with this we copy the header to a temporary
-	 * location, strip the header, and restore it as needed.
-	 */
-	bcopy(eh, &save_eh, ETHER_HDR_LEN);	/* local copy for restore */
-	m_adj(m0, ETHER_HDR_LEN);		/* temporarily strip header */
-
-	/*
-	 * Check that the IP header is aligned before passing up to the packet
-	 * filter.
-	 */
-	if (ntohs(save_eh.ether_type) == ETHERTYPE_IP && 
-	    IP_HDR_ALIGNED_P(mtod(m0, caddr_t)) == 0) {
-		if ((m0 = m_copyup(m0, sizeof(struct ip),
-			(max_linkhdr + 3) & ~3)) == NULL) {
-			bdg_dropped++;
-			return NULL;
-		}
-	}
-
-	/*
-	 * NetBSD-style generic packet filter, pfil(9), hooks.
-	 * Enables ipf(8) in bridging.
-	 */
-	if (!IPFW_LOADED) { /* XXX: Prevent ipfw from being run twice. */
-	if (inet_pfil_hook.ph_busy_count >= 0 &&
-	    m0->m_pkthdr.len >= sizeof(struct ip) &&
-	    ntohs(save_eh.ether_type) == ETHERTYPE_IP) {
-	    /*
-	     * before calling the firewall, swap fields the same as IP does.
-	     * here we assume the pkt is an IP one and the header is contiguous
-	     */
-	    struct ip *ip = mtod(m0, struct ip *);
-
-	    ip->ip_len = ntohs(ip->ip_len);
-	    ip->ip_off = ntohs(ip->ip_off);
-
-	    if (pfil_run_hooks(&inet_pfil_hook, &m0, src, PFIL_IN, NULL) != 0) {
-		/* NB: hook should consume packet */
-		return NULL;
-	    }
-	    if (m0 == NULL)			/* consumed by filter */
-		return m0;
-	    /*
-	     * If we get here, the firewall has passed the pkt, but the mbuf
-	     * pointer might have changed. Restore ip and the fields ntohs()'d.
-	     */
-	    ip = mtod(m0, struct ip *);
-	    ip->ip_len = htons(ip->ip_len);
-	    ip->ip_off = htons(ip->ip_off);
-	}
-	} /* XXX: Prevent ipfw from being run twice. */
-
-	/*
-	 * Prepare arguments and call the firewall.
-	 */
-	if (!IPFW_LOADED || bdg_ipfw == 0) {
-	    EH_RESTORE(m0);	/* restore Ethernet header */
-	    goto forward;	/* not using ipfw, accept the packet */
-	}
-
-	/*
-	 * XXX The following code is very similar to the one in
-	 * if_ethersubr.c:ether_ipfw_chk()
-	 */
-
-	args.m = m0;		/* the packet we are looking at		*/
-	args.oif = NULL;	/* this is an input packet		*/
-	args.next_hop = NULL;	/* we do not support forward yet	*/
-	args.eh = &save_eh;	/* MAC header for bridged/MAC packets	*/
-	i = ip_fw_chk_ptr(&args);
-	m0 = args.m;		/* in case the firewall used the mbuf	*/
-
-	if (m0 != NULL)
-		EH_RESTORE(m0);	/* restore Ethernet header */
-
-	if (i == IP_FW_DENY) /* drop */
-	    return m0;
-
-	KASSERT(m0 != NULL, ("bdg_forward: m0 is NULL"));
-
-	if (i == 0) /* a PASS rule.  */
-	    goto forward;
-	if (DUMMYNET_LOADED && (i == IP_FW_DUMMYNET)) {
-	    /*
-	     * Pass the pkt to dummynet, which consumes it.
-	     * If shared, make a copy and keep the original.
-	     */
-	    if (shared) {
-		m = m_copypacket(m0, M_DONTWAIT);
-		if (m == NULL) {	/* copy failed, give up */
-		    bdg_dropped++;
-		    return NULL;
-		}
-	    } else {
-		m = m0 ; /* pass the original to dummynet */
-		m0 = NULL ; /* and nothing back to the caller */
-	    }
-
-	    args.oif = real_dst;
-	    ip_dn_io_ptr(m, DN_TO_BDG_FWD, &args);
-	    return m0;
-	}
-	/*
-	 * XXX at some point, add support for divert/forward actions.
-	 * If none of the above matches, we have to drop the packet.
-	 */
-	bdg_ipfw_drops++;
-	return m0;
-    }
-forward:
-    /*
-     * Again, bring up the headers in case of shared bufs to avoid
-     * corruptions in the future.
-     */
-    if (shared) {
-	int i = min(m0->m_pkthdr.len, max_protohdr);
-
-	m0 = m_pullup(m0, i);
-	if (m0 == NULL) {
-	    bdg_dropped++;
-	    return NULL;
-	}
-	/* NB: eh is not used below; no need to recalculate it */
-    }
-
-    /*
-     * now real_dst is used to determine the cluster where to forward.
-     * For packets coming from ether_input, this is the one of the 'src'
-     * interface, whereas for locally generated packets (src==NULL) it
-     * is the cluster of the original destination interface, which
-     * was already saved into real_dst.
-     */
-    if (src != NULL)
-	real_dst = src;
-
-    last = NULL;
-    if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) {
-	/*
-	 * Scan all ports and send copies to all but the last.
-	 */
-	IFNET_RLOCK();		/* XXX replace with generation # */
-	TAILQ_FOREACH(ifp, &ifnet, if_link) {
-	    if (bridge_ifok(ifp, src, real_dst)) {
-		if (last) {
-		    /*
-		     * At this point we know two interfaces need a copy
-		     * of the packet (last + ifp) so we must create a
-		     * copy to handoff to last.
-		     */
-		    m = m_copypacket(m0, M_DONTWAIT);
-		    if (m == NULL) {
-			IFNET_RUNLOCK();
-			printf("%s: , m_copypacket failed!\n", __func__);
-			bdg_dropped++;
-			return m0;	/* the original is still there... */
-		    }
-		    IFQ_HANDOFF(last, m, error);
-		    if (!error)
-			BDG_STAT(last, BDG_OUT);
-		    else
-			bdg_dropped++;
-		}
-		last = ifp;
-	    }
-	}
-	IFNET_RUNLOCK();
-    } else {
-	if (bridge_ifok(dst, src, real_dst))
-	    last = dst;
-    }
-    if (last) {
-	if (shared) {			/* need to copy */
-	    m = m_copypacket(m0, M_DONTWAIT);
-	    if (m == NULL) {
-		printf("%s: sorry, m_copypacket failed!\n", __func__);
-		bdg_dropped++ ;
-		return m0;		/* the original is still there... */
-	    }
-	} else {			/* consume original */
-	    m = m0, m0 = NULL;
-	}
-	IFQ_HANDOFF(last, m, error);
-	if (!error)
-	    BDG_STAT(last, BDG_OUT);
-	else
-	    bdg_dropped++;
-    }
-
-    DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;
-	if (bdg_fw_count != 0) bdg_fw_avg = bdg_fw_ticks/bdg_fw_count; )
-    return m0;
-#undef EH_RESTORE
-}
-
-/*
- * initialization of bridge code.
- */
-static int
-bdginit(void)
-{
-    if (bootverbose)
-	    printf("BRIDGE %s loaded\n", bridge_version);
-
-    ifp2sc = malloc(BDG_MAX_PORTS * sizeof(struct bdg_softc),
-		M_IFADDR, M_WAITOK | M_ZERO );
-    if (ifp2sc == NULL)
-	return ENOMEM;
-
-    BDG_LOCK_INIT();
-
-    n_clusters = 0;
-    clusters = NULL;
-    do_bridge = 0;
-
-    bzero(&bdg_stats, sizeof(bdg_stats));
-
-    bridge_in_ptr = bridge_in;
-    bdg_forward_ptr = bdg_forward;
-    bdgtakeifaces_ptr = reconfigure_bridge;
-
-    bdgtakeifaces_ptr();		/* XXX does this do anything? */
-
-    callout_init(&bdg_callout, NET_CALLOUT_MPSAFE);
-    bdg_timeout(0);
-    return 0 ;
-}
-
-static void
-bdgdestroy(void)
-{
-    bridge_in_ptr = NULL;
-    bdg_forward_ptr = NULL;
-    bdgtakeifaces_ptr = NULL;
-
-    callout_stop(&bdg_callout);
-    BDG_LOCK();
-    bridge_off();
-
-    if (ifp2sc) {
-	free(ifp2sc, M_IFADDR);
-	ifp2sc = NULL;
-    }
-    BDG_LOCK_DESTROY();
-}
-
-/*
- * initialization code, both for static and dynamic loading.
- */
-static int
-bridge_modevent(module_t mod, int type, void *unused)
-{
-	int err;
-
-	switch (type) {
-	case MOD_LOAD:
-		if (BDG_LOADED)
-			err = EEXIST;
-		else
-			err = bdginit();
-		break;
-	case MOD_UNLOAD:
-		do_bridge = 0;
-		bdgdestroy();
-		err = 0;
-		break;
-	default:
-		err = EINVAL;
-		break;
-	}
-	return err;
-}
-
-static moduledata_t bridge_mod = {
-	"bridge",
-	bridge_modevent,
-	0
-};
-
-DECLARE_MODULE(bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
-MODULE_VERSION(bridge, 1);
Index: if_arp.h
===================================================================
RCS file: /home/cvs/src/sys/net/if_arp.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_arp.h -L sys/net/if_arp.h -u -r1.1.1.1 -r1.2
--- sys/net/if_arp.h
+++ sys/net/if_arp.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_arp.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/net/if_arp.h,v 1.22 2005/06/10 16:49:18 brooks Exp $
+ * $FreeBSD: src/sys/net/if_arp.h,v 1.24 2005/11/11 16:04:48 ru Exp $
  */
 
 #ifndef _NET_IF_ARP_H_
@@ -103,11 +103,9 @@
  */
 struct	arpcom {
 	struct 	ifnet *ac_ifp;		/* network-visible interface */
-	u_char	_ac_enaddr[6];		/* ethernet hardware address */
 	void	*ac_netgraph;		/* ng_ether(4) netgraph node info */
 };
 #define IFP2AC(ifp) ((struct arpcom *)(ifp->if_l2com))
-#define IFP2ENADDR(ifp) (IFP2AC(ifp)->_ac_enaddr)
 #define AC2IFP(ac) ((ac)->ac_ifp)
 
 #endif
--- /dev/null
+++ sys/net/bpf_jitter.h
@@ -0,0 +1,82 @@
+/*-
+ * Copyright (c) 2002 - 2003 NetGroup, Politecnico di Torino (Italy)
+ * Copyright (c) 2005 Jung-uk Kim <jkim at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Politecnico di Torino nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS intERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/net/bpf_jitter.h,v 1.2 2005/12/07 21:30:47 jkim Exp $
+ */
+
+#ifndef _NET_BPF_JITTER_H_
+#define _NET_BPF_JITTER_H_
+
+MALLOC_DECLARE(M_BPFJIT);
+
+extern int bpf_jitter_enable;
+
+/*
+ * Prototype of a filtering function created by the jitter.
+ *
+ * The syntax and the meaning of the parameters is analogous to the one of
+ * bpf_filter(). Notice that the filter is not among the parameters because
+ * it is hardwired in the function.
+ */
+typedef u_int (*bpf_filter_func)(u_char *, u_int, u_int);
+
+/* Structure describing a native filtering program created by the jitter. */
+typedef struct bpf_jit_filter {
+	/* The native filtering binary, in the form of a bpf_filter_func. */
+	bpf_filter_func	func;
+
+	int		*mem;
+} bpf_jit_filter;
+
+/*
+ * BPF jitter, builds a machine function from a BPF program.
+ *
+ * param fp	The BPF pseudo-assembly filter that will be translated
+ *		into native code.
+ * param nins	Number of instructions of the input filter.
+ * return	The bpf_jit_filter structure containing the native filtering
+ *		binary.
+ *
+ * bpf_jitter allocates the buffers for the new native filter and
+ * then translates the program pointed by fp calling bpf_jit_compile().
+ */
+bpf_jit_filter	*bpf_jitter(struct bpf_insn *fp, int nins);
+
+/*
+ * Deletes a filtering function that was previously created by bpf_jitter().
+ *
+ * param filter	The filter to destroy.
+ *
+ * This function frees the variuos buffers (code, memory, etc.) associated
+ * with a filtering function.
+ */
+void		bpf_destroy_jit_filter(bpf_jit_filter *filter);
+
+#endif	/* _NET_BPF_JITTER_H_ */
Index: if_spppsubr.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_spppsubr.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/net/if_spppsubr.c -L sys/net/if_spppsubr.c -u -r1.2 -r1.3
--- sys/net/if_spppsubr.c
+++ sys/net/if_spppsubr.c
@@ -18,7 +18,7 @@
  *
  * From: Version 2.4, Thu Apr 30 17:17:21 MSD 1997
  *
- * $FreeBSD: src/sys/net/if_spppsubr.c,v 1.119.2.2 2005/11/04 20:26:14 ume Exp $
+ * $FreeBSD: src/sys/net/if_spppsubr.c,v 1.127 2007/06/10 04:53:13 mjacob Exp $
  */
 
 #include <sys/param.h>
@@ -793,7 +793,7 @@
 		 * packets.  This is used by some subsystems to detect
 		 * idle lines.
 		 */
-		sp->pp_last_recv = time_second;
+		sp->pp_last_recv = time_uptime;
 }
 
 static void
@@ -1066,7 +1066,7 @@
 	 * network-layer traffic; control-layer traffic is handled
 	 * by sppp_cp_send().
 	 */
-	sp->pp_last_sent = time_second;
+	sp->pp_last_sent = time_uptime;
 	return (0);
 }
 
@@ -1104,7 +1104,7 @@
 		mtx_init(&sp->pp_cpq.ifq_mtx, "sppp_cpq", NULL, MTX_DEF);
 	if(!mtx_initialized(&sp->pp_fastq.ifq_mtx))
 		mtx_init(&sp->pp_fastq.ifq_mtx, "sppp_fastq", NULL, MTX_DEF);
-	sp->pp_last_recv = sp->pp_last_sent = time_second;
+	sp->pp_last_recv = sp->pp_last_sent = time_uptime;
 	sp->confflags = 0;
 #ifdef INET
 	sp->confflags |= CONF_ENABLE_VJ;
@@ -3502,6 +3502,7 @@
 	int ifidcount;
 	int type;
 	int collision, nohisaddr;
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	len -= 4;
 	origlen = len;
@@ -3595,8 +3596,8 @@
 
 				if (debug) {
 					log(-1, " %s [%s]",
-					       ip6_sprintf(&desiredaddr),
-					       sppp_cp_type_name(type));
+					    ip6_sprintf(ip6buf, &desiredaddr),
+					    sppp_cp_type_name(type));
 				}
 				continue;
 			}
@@ -3617,8 +3618,9 @@
 				bcopy(&suggestaddr.s6_addr[8], &p[2], 8);
 			}
 			if (debug)
-				log(-1, " %s [%s]", ip6_sprintf(&desiredaddr),
-				       sppp_cp_type_name(type));
+				log(-1, " %s [%s]",
+				    ip6_sprintf(ip6buf, &desiredaddr),
+				    sppp_cp_type_name(type));
 			break;
 		}
 		/* Add the option to nak'ed list. */
@@ -3639,7 +3641,8 @@
 
 		if (debug) {
 			log(-1, " send %s suggest %s\n",
-			       sppp_cp_type_name(type), ip6_sprintf(&suggestaddr));
+			    sppp_cp_type_name(type),
+			    ip6_sprintf(ip6buf, &suggestaddr));
 		}
 		sppp_cp_send (sp, PPP_IPV6CP, type, h->ident, rlen, buf);
 	}
@@ -3706,6 +3709,7 @@
 	struct ifnet *ifp = SP2IFP(sp);
 	int debug = ifp->if_flags & IFF_DEBUG;
 	struct in6_addr suggestaddr;
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	len -= 4;
 	buf = malloc (len, M_TEMP, M_NOWAIT);
@@ -3738,7 +3742,7 @@
 			sp->ipv6cp.opts |= (1 << IPV6CP_OPT_IFID);
 			if (debug)
 				log(-1, " [suggestaddr %s]",
-				       ip6_sprintf(&suggestaddr));
+				       ip6_sprintf(ip6buf, &suggestaddr));
 #ifdef IPV6CP_MYIFID_DYN
 			/*
 			 * When doing dynamic address assignment,
@@ -4967,7 +4971,7 @@
 	if (ifa && si)
 	{
 		int error;
-#if __NetBSD_Version__ >= 103080000
+#if defined(__NetBSD__) && __NetBSD_Version__ >= 103080000
 		struct sockaddr_in new_sin = *si;
 
 		new_sin.sin_addr.s_addr = htonl(src);
@@ -5023,16 +5027,15 @@
 	 * Pick the first link-local AF_INET6 address from the list,
 	 * aliases don't make any sense on a p2p link anyway.
 	 */
+	si = 0;
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3
-	for (ifa = ifp->if_addrhead.tqh_first, si = 0;
-	     ifa;
-	     ifa = ifa->ifa_link.tqe_next)
+	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 #elif defined(__NetBSD__) || defined (__OpenBSD__)
-	for (ifa = ifp->if_addrlist.tqh_first, si = 0;
+	for (ifa = ifp->if_addrlist.tqh_first;
 	     ifa;
 	     ifa = ifa->ifa_list.tqe_next)
 #else
-	for (ifa = ifp->if_addrlist, si = 0;
+	for (ifa = ifp->if_addrlist;
 	     ifa;
 	     ifa = ifa->ifa_next)
 #endif
@@ -5089,9 +5092,7 @@
 
 	sin6 = NULL;
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3
-	for (ifa = ifp->if_addrhead.tqh_first;
-	     ifa;
-	     ifa = ifa->ifa_link.tqe_next)
+	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 #elif defined(__NetBSD__) || defined (__OpenBSD__)
 	for (ifa = ifp->if_addrlist.tqh_first;
 	     ifa;
@@ -5175,7 +5176,7 @@
 	}
 
 	switch (subcmd) {
-	case (int)SPPPIOGDEFS:
+	case (u_long)SPPPIOGDEFS:
 		if (cmd != SIOCGIFGENERIC) {
 			rv = EINVAL;
 			break;
@@ -5210,7 +5211,7 @@
 			     sizeof(struct spppreq));
 		break;
 
-	case (int)SPPPIOSDEFS:
+	case (u_long)SPPPIOSDEFS:
 		if (cmd != SIOCSIFGENERIC) {
 			rv = EINVAL;
 			break;
Index: pfil.c
===================================================================
RCS file: /home/cvs/src/sys/net/pfil.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/pfil.c -L sys/net/pfil.c -u -r1.1.1.1 -r1.2
--- sys/net/pfil.c
+++ sys/net/pfil.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/net/pfil.c,v 1.13 2005/05/23 17:07:16 mlaier Exp $ */
+/*	$FreeBSD: src/sys/net/pfil.c,v 1.14 2006/02/02 03:13:15 csjp Exp $ */
 /*	$NetBSD: pfil.c,v 1.20 2001/11/12 23:49:46 lukem Exp $	*/
 
 /*-
@@ -32,7 +32,9 @@
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/errno.h>
+#include <sys/lock.h>
 #include <sys/malloc.h>
+#include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/systm.h>
@@ -57,57 +59,6 @@
 LIST_HEAD(, pfil_head) pfil_head_list =
     LIST_HEAD_INITIALIZER(&pfil_head_list);
 
-static __inline void
-PFIL_RLOCK(struct pfil_head *ph)
-{
-	mtx_lock(&ph->ph_mtx);
-	ph->ph_busy_count++;
-	mtx_unlock(&ph->ph_mtx);
-}
-
-static __inline void
-PFIL_RUNLOCK(struct pfil_head *ph)
-{
-	mtx_lock(&ph->ph_mtx);
-	ph->ph_busy_count--;
-	if (ph->ph_busy_count == 0 && ph->ph_want_write)
-		cv_signal(&ph->ph_cv);
-	mtx_unlock(&ph->ph_mtx);
-}
-
-static __inline void
-PFIL_WLOCK(struct pfil_head *ph)
-{
-	mtx_lock(&ph->ph_mtx);
-	ph->ph_want_write = 1;
-	while (ph->ph_busy_count > 0)
-		cv_wait(&ph->ph_cv, &ph->ph_mtx);
-}
-
-static __inline int
-PFIL_TRY_WLOCK(struct pfil_head *ph)
-{
-	mtx_lock(&ph->ph_mtx);
-	ph->ph_want_write = 1;
-	if (ph->ph_busy_count > 0) {
-		ph->ph_want_write = 0;
-		mtx_unlock(&ph->ph_mtx);
-		return EBUSY;
-	}
-	return 0;
-}
-
-static __inline void
-PFIL_WUNLOCK(struct pfil_head *ph)
-{
-	ph->ph_want_write = 0;
-	cv_signal(&ph->ph_cv);
-	mtx_unlock(&ph->ph_mtx);
-}
-
-#define PFIL_LIST_LOCK() mtx_lock(&pfil_global_lock)
-#define PFIL_LIST_UNLOCK() mtx_unlock(&pfil_global_lock)
-
 /*
  * pfil_run_hooks() runs the specified packet filter hooks.
  */
@@ -119,20 +70,8 @@
 	struct mbuf *m = *mp;
 	int rv = 0;
 
-	if (ph->ph_busy_count == -1)
-		return (0);
-	/*
-	 * Prevent packet filtering from starving the modification of
-	 * the packet filters. We would prefer a reader/writer locking
-	 * mechanism with guaranteed ordering, though.
-	 */
-	if (ph->ph_want_write) {
-		m_freem(*mp);
-		*mp = NULL;
-		return (ENOBUFS);
-	}
-
 	PFIL_RLOCK(ph);
+	KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0"));
 	for (pfh = pfil_hook_get(dir, ph); pfh != NULL;
 	     pfh = TAILQ_NEXT(pfh, pfil_link)) {
 		if (pfh->pfil_func != NULL) {
@@ -165,16 +104,9 @@
 		}
 	PFIL_LIST_UNLOCK();
 
-	if (mtx_initialized(&ph->ph_mtx)) {	/* should not happen */
-		KASSERT((0), ("%s: allready initialized!", __func__));
-		return EBUSY;
-	} else {
-		ph->ph_busy_count = -1;
-		ph->ph_want_write = 1;
-		mtx_init(&ph->ph_mtx, "pfil_head_mtx", NULL, MTX_DEF);
-		cv_init(&ph->ph_cv, "pfil_head_cv");
-		mtx_lock(&ph->ph_mtx);			/* XXX: race? */
-	}
+	rw_init(&ph->ph_mtx, "PFil hook read/write mutex");
+	PFIL_WLOCK(ph);
+	ph->ph_nhooks = 0;
 
 	TAILQ_INIT(&ph->ph_in);
 	TAILQ_INIT(&ph->ph_out);
@@ -182,9 +114,9 @@
 	PFIL_LIST_LOCK();
 	LIST_INSERT_HEAD(&pfil_head_list, ph, ph_list);
 	PFIL_LIST_UNLOCK();
-	
+
 	PFIL_WUNLOCK(ph);
-	
+
 	return (0);
 }
 
@@ -205,14 +137,13 @@
 	LIST_REMOVE(ph, ph_list);
 	PFIL_LIST_UNLOCK();
 
-	PFIL_WLOCK(ph);			/* XXX: may sleep (cv_wait)! */
+	PFIL_WLOCK(ph);
 	
 	TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_link, pfnext)
 		free(pfh, M_IFADDR);
 	TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_link, pfnext)
 		free(pfh, M_IFADDR);
-	cv_destroy(&ph->ph_cv);
-	mtx_destroy(&ph->ph_mtx);
+	rw_destroy(&ph->ph_mtx);
 	
 	return (0);
 }
@@ -269,13 +200,7 @@
 	}
 
 	/* Lock */
-	if (flags & PFIL_WAITOK)
-		PFIL_WLOCK(ph);
-	else {
-		err = PFIL_TRY_WLOCK(ph);
-		if (err)
-			goto error;
-	}
+	PFIL_WLOCK(ph);
 
 	/* Add */
 	if (flags & PFIL_IN) {
@@ -284,6 +209,7 @@
 		err = pfil_list_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
 		if (err)
 			goto done;
+		ph->ph_nhooks++;
 	}
 	if (flags & PFIL_OUT) {
 		pfh2->pfil_func = func;
@@ -294,9 +220,9 @@
 				pfil_list_remove(&ph->ph_in, func, arg);
 			goto done;
 		}
+		ph->ph_nhooks++;
 	}
 
-	ph->ph_busy_count = 0;
 	PFIL_WUNLOCK(ph);
 
 	return 0;
@@ -320,22 +246,18 @@
 {
 	int err = 0;
 
-	if (flags & PFIL_WAITOK)
-		PFIL_WLOCK(ph);
-	else {
-		err = PFIL_TRY_WLOCK(ph);
-		if (err)
-			return err;
-	}
+	PFIL_WLOCK(ph);
 
-	if (flags & PFIL_IN)
+	if (flags & PFIL_IN) {
 		err = pfil_list_remove(&ph->ph_in, func, arg);
-	if ((err == 0) && (flags & PFIL_OUT))
+		if (err == 0)
+			ph->ph_nhooks--;
+	}
+	if ((err == 0) && (flags & PFIL_OUT)) {
 		err = pfil_list_remove(&ph->ph_out, func, arg);
-
-	if (TAILQ_EMPTY(&ph->ph_in) && TAILQ_EMPTY(&ph->ph_out))
-		ph->ph_busy_count = -1;
-
+		if (err == 0)
+			ph->ph_nhooks--;
+	}
 	PFIL_WUNLOCK(ph);
 	
 	return err;
Index: if_gif.h
===================================================================
RCS file: /home/cvs/src/sys/net/if_gif.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/net/if_gif.h -L sys/net/if_gif.h -u -r1.1.1.2 -r1.2
--- sys/net/if_gif.h
+++ sys/net/if_gif.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/net/if_gif.h,v 1.17.2.2 2006/01/31 15:56:46 glebius Exp $	*/
+/*	$FreeBSD: src/sys/net/if_gif.h,v 1.19 2006/01/30 08:39:09 glebius Exp $	*/
 /*	$KAME: if_gif.h,v 1.17 2000/09/11 11:36:41 sumikawa Exp $	*/
 
 /*-
Index: if_stf.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_stf.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_stf.c -L sys/net/if_stf.c -u -r1.1.1.1 -r1.2
--- sys/net/if_stf.c
+++ sys/net/if_stf.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/net/if_stf.c,v 1.50.2.1 2005/11/16 10:31:21 ru Exp $	*/
+/*	$FreeBSD: src/sys/net/if_stf.c,v 1.60 2007/09/23 17:50:17 csjp Exp $	*/
 /*	$KAME: if_stf.c,v 1.73 2001/12/03 11:08:30 keiichi Exp $	*/
 
 /*-
@@ -82,7 +82,6 @@
 #include <sys/systm.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
-#include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
@@ -115,10 +114,10 @@
 
 #include <machine/stdarg.h>
 
-#include <net/net_osdep.h>
-
 #include <net/bpf.h>
 
+#include <security/mac/mac_framework.h>
+
 #define STFNAME		"stf"
 #define STFUNIT		0
 
@@ -138,19 +137,14 @@
 	} __sc_ro46;
 #define sc_ro	__sc_ro46.__sc_ro4
 	const struct encaptab *encap_cookie;
-	LIST_ENTRY(stf_softc) sc_list;	/* all stf's are linked */
 };
 #define STF2IFP(sc)	((sc)->sc_ifp)
 
 /*
- * All mutable global variables in if_stf.c are protected by stf_mtx.
  * XXXRW: Note that mutable fields in the softc are not currently locked:
  * in particular, sc_ro needs to be protected from concurrent entrance
  * of stf_output().
  */
-static struct mtx stf_mtx;
-static LIST_HEAD(, stf_softc) stf_softc_list;
-
 static MALLOC_DEFINE(M_STF, STFNAME, "6to4 Tunnel Interface");
 static const int ip_stf_ttl = 40;
 
@@ -182,7 +176,7 @@
 static int stf_ioctl(struct ifnet *, u_long, caddr_t);
 
 static int stf_clone_match(struct if_clone *, const char *);
-static int stf_clone_create(struct if_clone *, char *, size_t);
+static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t);
 static int stf_clone_destroy(struct if_clone *, struct ifnet *);
 struct if_clone stf_cloner = IFC_CLONE_INITIALIZER(STFNAME, NULL, 0,
     NULL, stf_clone_match, stf_clone_create, stf_clone_destroy);
@@ -201,7 +195,7 @@
 }
 
 static int
-stf_clone_create(struct if_clone *ifc, char *name, size_t len)
+stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 {
 	int err, unit;
 	struct stf_softc *sc;
@@ -249,36 +243,22 @@
 	ifp->if_snd.ifq_maxlen = IFQ_MAXLEN;
 	if_attach(ifp);
 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
-	mtx_lock(&stf_mtx);
-	LIST_INSERT_HEAD(&stf_softc_list, sc, sc_list);
-	mtx_unlock(&stf_mtx);
 	return (0);
 }
 
-static void
-stf_destroy(struct stf_softc *sc)
+static int
+stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
 {
+	struct stf_softc *sc = ifp->if_softc;
 	int err;
 
 	err = encap_detach(sc->encap_cookie);
 	KASSERT(err == 0, ("Unexpected error detaching encap_cookie"));
-	bpfdetach(STF2IFP(sc));
-	if_detach(STF2IFP(sc));
-	if_free(STF2IFP(sc));
+	bpfdetach(ifp);
+	if_detach(ifp);
+	if_free(ifp);
 
 	free(sc, M_STF);
-}
-
-static int
-stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
-{
-	struct stf_softc *sc = ifp->if_softc;
-
-	mtx_lock(&stf_mtx);
-	LIST_REMOVE(sc, sc_list);
-	mtx_unlock(&stf_mtx);
-
-	stf_destroy(sc);
 	ifc_free_unit(ifc, STFUNIT);
 
 	return (0);
@@ -290,27 +270,13 @@
 	int type;
 	void *data;
 {
-	struct stf_softc *sc;
 
 	switch (type) {
 	case MOD_LOAD:
-		mtx_init(&stf_mtx, "stf_mtx", NULL, MTX_DEF);
-		LIST_INIT(&stf_softc_list);
 		if_clone_attach(&stf_cloner);
-
 		break;
 	case MOD_UNLOAD:
 		if_clone_detach(&stf_cloner);
-
-		mtx_lock(&stf_mtx);
-		while ((sc = LIST_FIRST(&stf_softc_list)) != NULL) {
-			LIST_REMOVE(sc, sc_list);
-			mtx_unlock(&stf_mtx);
-			stf_destroy(sc);
-			mtx_lock(&stf_mtx);
-		}
-		mtx_unlock(&stf_mtx);
-		mtx_destroy(&stf_mtx);
 		break;
 	default:
 		return (EOPNOTSUPP);
@@ -400,12 +366,7 @@
 	struct sockaddr_in6 *sin6;
 	struct in_addr in;
 
-	for (ia = TAILQ_FIRST(&ifp->if_addrlist);
-	     ia;
-	     ia = TAILQ_NEXT(ia, ifa_list))
-	{
-		if (ia->ifa_addr == NULL)
-			continue;
+	TAILQ_FOREACH(ia, &ifp->if_addrlist, ifa_list) {
 		if (ia->ifa_addr->sa_family != AF_INET6)
 			continue;
 		sin6 = (struct sockaddr_in6 *)ia->ifa_addr;
@@ -509,7 +470,7 @@
 	}
 	bcopy(ptr, &in4, sizeof(in4));
 
-	if (ifp->if_bpf) {
+	if (bpf_peers_present(ifp->if_bpf)) {
 		/*
 		 * We need to prepend the address family as
 		 * a four byte field.  Cons up a dummy header
@@ -646,10 +607,10 @@
 			    (u_int32_t)ntohl(sin.sin_addr.s_addr));
 #endif
 			if (rt)
-				rtfree(rt);
+				RTFREE_LOCKED(rt);
 			return -1;
 		}
-		rtfree(rt);
+		RTFREE_LOCKED(rt);
 	}
 
 	return 0;
@@ -756,7 +717,7 @@
 
 	m->m_pkthdr.rcvif = ifp;
 	
-	if (ifp->if_bpf) {
+	if (bpf_peers_present(ifp->if_bpf)) {
 		/*
 		 * We need to prepend the address family as
 		 * a four byte field.  Cons up a dummy header
Index: if_media.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_media.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/net/if_media.c -L sys/net/if_media.c -u -r1.2 -r1.3
--- sys/net/if_media.c
+++ sys/net/if_media.c
@@ -1,5 +1,5 @@
 /*	$NetBSD: if_media.c,v 1.1 1997/03/17 02:55:15 thorpej Exp $	*/
-/* $FreeBSD: src/sys/net/if_media.c,v 1.21 2005/01/07 01:45:34 imp Exp $ */
+/* $FreeBSD: src/sys/net/if_media.c,v 1.23 2006/02/14 12:10:03 glebius Exp $ */
 
 /*-
  * Copyright (c) 1997
@@ -51,6 +51,8 @@
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_media.h>
@@ -67,6 +69,8 @@
 
 #ifdef IFMEDIA_DEBUG
 int	ifmedia_debug = 0;
+SYSCTL_INT(_debug, OID_AUTO, ifmedia, CTLFLAG_RW, &ifmedia_debug,
+	    0, "if_media debugging msgs");
 static	void ifmedia_printword(int);
 #endif
 
@@ -382,28 +386,27 @@
 }
 
 /*
-  	  * Compute the interface `baudrate' from the media, for the interface
-  	  * metrics (used by routing daemons).
-  	  */
-  	 static const struct ifmedia_baudrate ifmedia_baudrate_descriptions[] =
-  	     IFM_BAUDRATE_DESCRIPTIONS;
-  	 
-  	 uint64_t
-  	 ifmedia_baudrate(int mword)
-  	 {
-  	         int i;
-  	 
-  	         for (i = 0; ifmedia_baudrate_descriptions[i].ifmb_word != 0; i++) {
-  	                 if ((mword & (IFM_NMASK|IFM_TMASK)) ==
-  	                     ifmedia_baudrate_descriptions[i].ifmb_word)
-  	                         return (ifmedia_baudrate_descriptions[i].ifmb_baudrate);
-  	         }
-  	 
-  	         /* Not known. */
-  	         return (0);
-  	 }
-  	 
+ * Compute the interface `baudrate' from the media, for the interface
+ * metrics (used by routing daemons).
+ */
+static const struct ifmedia_baudrate ifmedia_baudrate_descriptions[] =   
+    IFM_BAUDRATE_DESCRIPTIONS;
+
+uint64_t
+ifmedia_baudrate(int mword)
+{
+	int i;
 
+	for (i = 0; ifmedia_baudrate_descriptions[i].ifmb_word != 0; i++) {
+		if ((mword & (IFM_NMASK|IFM_TMASK)) ==
+		    ifmedia_baudrate_descriptions[i].ifmb_word)
+			return (ifmedia_baudrate_descriptions[i].ifmb_baudrate);
+	}
+
+	/* Not known. */
+	return (0);
+}
+ 
 #ifdef IFMEDIA_DEBUG
 struct ifmedia_description ifm_type_descriptions[] =
     IFM_TYPE_DESCRIPTIONS;
Index: if_fddisubr.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_fddisubr.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_fddisubr.c -L sys/net/if_fddisubr.c -u -r1.1.1.1 -r1.2
--- sys/net/if_fddisubr.c
+++ sys/net/if_fddisubr.c
@@ -33,7 +33,7 @@
  * SUCH DAMAGE.
  *
  *	from: if_ethersubr.c,v 1.5 1994/12/13 22:31:45 wollman Exp
- * $FreeBSD: src/sys/net/if_fddisubr.c,v 1.98.2.2 2005/08/25 05:01:20 rwatson Exp $
+ * $FreeBSD: src/sys/net/if_fddisubr.c,v 1.104 2006/10/22 11:52:15 rwatson Exp $
  */
 
 #include "opt_atalk.h"
@@ -45,7 +45,6 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
@@ -89,6 +88,8 @@
 extern u_char	aarp_org_code[ 3 ];
 #endif /* NETATALK */
 
+#include <security/mac/mac_framework.h>
+
 static const u_char fddibroadcastaddr[FDDI_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
@@ -309,7 +310,7 @@
 	if (hdrcmplt)
 		bcopy((caddr_t)esrc, (caddr_t)fh->fddi_shost, FDDI_ADDR_LEN);
 	else
-		bcopy(IFP2ENADDR(ifp), (caddr_t)fh->fddi_shost,
+		bcopy(IF_LLADDR(ifp), (caddr_t)fh->fddi_shost,
 			FDDI_ADDR_LEN);
 
 	/*
@@ -420,7 +421,7 @@
 	 * is in promiscuous mode.
 	 */
 	if ((ifp->if_flags & IFF_PROMISC) && ((fh->fddi_dhost[0] & 1) == 0) &&
-	    (bcmp(IFP2ENADDR(ifp), (caddr_t)fh->fddi_dhost,
+	    (bcmp(IF_LLADDR(ifp), (caddr_t)fh->fddi_dhost,
 	     FDDI_ADDR_LEN) != 0))
 		goto dropanyway;
 
@@ -495,7 +496,7 @@
 		switch (type) {
 #ifdef INET
 		case ETHERTYPE_IP:
-			if (ip_fastforward(m))
+			if ((m = ip_fastforward(m)) == NULL)
 				return;
 			isr = NETISR_IP;
 			break;
@@ -556,8 +557,9 @@
  * Perform common duties while attaching to interface list
  */
 void
-fddi_ifattach(ifp, bpf)
+fddi_ifattach(ifp, lla, bpf)
 	struct ifnet *ifp;
+	const u_int8_t *lla;
 	int bpf;
 {
 	struct ifaddr *ifa;
@@ -578,16 +580,13 @@
 #ifdef IFF_NOTRAILERS
 	ifp->if_flags |= IFF_NOTRAILERS;
 #endif
-	ifa = ifaddr_byindex(ifp->if_index);
-	if (ifa == NULL) {
-		if_printf(ifp, "%s() no lladdr!\n", __func__);
-		return;
-	}
+	ifa = ifp->if_addr;
+	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_FDDI;
 	sdl->sdl_alen = ifp->if_addrlen;
-	bcopy(IFP2ENADDR(ifp), LLADDR(sdl), ifp->if_addrlen);
+	bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
 	if (bpf)
 		bpfattach(ifp, DLT_FDDI, FDDI_HDR_LEN);
@@ -645,10 +644,10 @@
 
 				if (ipx_nullhost(*ina)) {
 					ina->x_host = *(union ipx_host *)
-							IFP2ENADDR(ifp);
+							IF_LLADDR(ifp);
 				} else {
 					bcopy((caddr_t) ina->x_host.c_host,
-					      (caddr_t) IFP2ENADDR(ifp),
+					      (caddr_t) IF_LLADDR(ifp),
 					      ETHER_ADDR_LEN);
 				}
 	
@@ -668,7 +667,7 @@
 			struct sockaddr *sa;
 
 			sa = (struct sockaddr *) & ifr->ifr_data;
-			bcopy(IFP2ENADDR(ifp),
+			bcopy(IF_LLADDR(ifp),
 			      (caddr_t) sa->sa_data, FDDI_ADDR_LEN);
 
 		}
--- /dev/null
+++ sys/net/bpf_jitter.c
@@ -0,0 +1,91 @@
+/*-
+ * Copyright (c) 2002 - 2003 NetGroup, Politecnico di Torino (Italy)
+ * Copyright (c) 2005 Jung-uk Kim <jkim at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Politecnico di Torino nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS intERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/net/bpf_jitter.c,v 1.3 2005/12/07 21:30:47 jkim Exp $");
+
+#include "opt_bpf.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/sysctl.h>
+
+#include <net/bpf.h>
+#include <net/bpf_jitter.h>
+
+MALLOC_DEFINE(M_BPFJIT, "BPF_JIT", "BPF JIT compiler");
+
+bpf_filter_func	bpf_jit_compile(struct bpf_insn *, u_int, int *);
+
+SYSCTL_NODE(_net, OID_AUTO, bpf_jitter, CTLFLAG_RW, 0, "BPF JIT compiler");
+int bpf_jitter_enable = 1;
+SYSCTL_INT(_net_bpf_jitter, OID_AUTO, enable, CTLFLAG_RW,
+    &bpf_jitter_enable, 0, "enable BPF JIT compiler");
+
+bpf_jit_filter *
+bpf_jitter(struct bpf_insn *fp, int nins)
+{
+	bpf_jit_filter *filter;
+
+	/* Allocate the filter structure */
+	filter = (struct bpf_jit_filter *)malloc(sizeof(struct bpf_jit_filter),
+	    M_BPFJIT, M_NOWAIT);
+	if (filter == NULL)
+		return NULL;
+
+	/* Allocate the filter's memory */
+	filter->mem = (int *)malloc(BPF_MEMWORDS * sizeof(int),
+	    M_BPFJIT, M_NOWAIT);
+	if (filter->mem == NULL) {
+		free(filter, M_BPFJIT);
+		return NULL;
+	}
+
+	/* Create the binary */
+	if ((filter->func = bpf_jit_compile(fp, nins, filter->mem)) == NULL) {
+		free(filter->mem, M_BPFJIT);
+		free(filter, M_BPFJIT);
+		return NULL;
+	}
+
+	return filter;
+}
+
+void
+bpf_destroy_jit_filter(bpf_jit_filter *filter)
+{
+
+	free(filter->mem, M_BPFJIT);
+	free(filter->func, M_BPFJIT);
+	free(filter, M_BPFJIT);
+}
Index: if_mib.h
===================================================================
RCS file: /home/cvs/src/sys/net/if_mib.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/net/if_mib.h -L sys/net/if_mib.h -u -r1.1.1.2 -r1.2
--- sys/net/if_mib.h
+++ sys/net/if_mib.h
@@ -26,7 +26,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/net/if_mib.h,v 1.7.2.1 2006/01/26 07:46:28 harti Exp $
+ * $FreeBSD: src/sys/net/if_mib.h,v 1.8 2006/01/04 12:57:09 harti Exp $
  */
 
 #ifndef _NET_IF_MIB_H
Index: pfkeyv2.h
===================================================================
RCS file: /home/cvs/src/sys/net/pfkeyv2.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/pfkeyv2.h -L sys/net/pfkeyv2.h -u -r1.1.1.1 -r1.2
--- sys/net/pfkeyv2.h
+++ sys/net/pfkeyv2.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/net/pfkeyv2.h,v 1.14 2005/01/07 01:45:35 imp Exp $	*/
+/*	$FreeBSD: src/sys/net/pfkeyv2.h,v 1.16 2007/07/03 12:13:43 gnn Exp $	*/
 /*	$KAME: pfkeyv2.h,v 1.37 2003/09/06 05:15:43 itojun Exp $	*/
 
 /*-
@@ -325,8 +325,10 @@
 #define SADB_X_EALG_BLOWFISHCBC	7
 #define SADB_X_EALG_RIJNDAELCBC	12
 #define SADB_X_EALG_AES		12
+/* private allocations - based on RFC4312/IANA assignment */
+#define SADB_X_EALG_CAMELLIACBC		22
 /* private allocations should use 249-255 (RFC2407) */
-#define SADB_X_EALG_SKIPJACK	249	/*250*/ /* for FAST_IPSEC */
+#define SADB_X_EALG_SKIPJACK	249	/*250*/ /* for IPSEC */
 #define SADB_X_EALG_AESCTR	250	/*249*/ /* draft-ietf-ipsec-ciph-aes-ctr-03 */
 
 /* private allocations - based on RFC2407/IANA assignment */
--- sys/net/bpf_compat.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*-
- * Copyright (c) 1992, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)bpf_compat.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/net/bpf_compat.h,v 1.12 2004/04/07 20:46:11 imp Exp $
- */
-
-#ifndef _NET_BPF_COMPAT_H_
-#define _NET_BPF_COMPAT_H_
-
-/*
- * Some hacks for compatibility across SunOS and 4.4BSD.  We emulate malloc
- * and free with mbuf clusters.  We store a pointer to the mbuf in the first
- * word of the mbuf and return 8 bytes past the start of data (for double
- * word alignment).  We cannot just use offsets because clusters are not at
- * a fixed offset from the associated mbuf.  Sorry for this kludge.
- */
-#define malloc(size, type, canwait)				\
-bpf_alloc(size, (canwait & M_NOWAIT) ? M_DONTWAIT : M_TRYWAIT)
-
-#define free(cp, type) m_free(*(struct mbuf **)(cp - 8))
-
-/* This mapping works for our purposes. */
-#define ERESTART EINTR
-
-#endif
Index: if_tun.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_tun.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_tun.c -L sys/net/if_tun.c -u -r1.1.1.1 -r1.2
--- sys/net/if_tun.c
+++ sys/net/if_tun.c
@@ -13,7 +13,7 @@
  * UCL. This driver is based much more on read/write/poll mode of
  * operation though.
  *
- * $FreeBSD: src/sys/net/if_tun.c,v 1.152.2.2 2005/08/25 05:01:20 rwatson Exp $
+ * $FreeBSD: src/sys/net/if_tun.c,v 1.163 2007/02/05 11:15:52 bms Exp $
  */
 
 #include "opt_atalk.h"
@@ -23,9 +23,9 @@
 #include "opt_mac.h"
 
 #include <sys/param.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
-#include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/socket.h>
@@ -45,6 +45,7 @@
 #include <sys/random.h>
 
 #include <net/if.h>
+#include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/route.h>
@@ -56,6 +57,8 @@
 
 #include <sys/queue.h>
 
+#include <security/mac/mac_framework.h>
+
 /*
  * tun_list is protected by global tunmtx.  Other mutable fields are
  * protected by tun->tun_mtx, or by their owning subsystem.  tun_dev is
@@ -103,13 +106,22 @@
 static struct mtx tunmtx;
 static MALLOC_DEFINE(M_TUN, TUNNAME, "Tunnel Interface");
 static int tundebug = 0;
+static int tundclone = 1;
 static struct clonedevs *tunclones;
 static TAILQ_HEAD(,tun_softc)	tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
 SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
 
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
+    "IP tunnel software network interface.");
+SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tundclone, 0,
+    "Enable legacy devfs interface creation.");
+
+TUNABLE_INT("net.link.tun.devfs_cloning", &tundclone);
+
 static void	tunclone(void *arg, struct ucred *cred, char *name,
 		    int namelen, struct cdev **dev);
-static void	tuncreate(struct cdev *dev);
+static void	tuncreate(const char *name, struct cdev *dev);
 static int	tunifioctl(struct ifnet *, u_long, caddr_t);
 static int	tuninit(struct ifnet *);
 static int	tunmodevent(module_t, int, void *);
@@ -117,12 +129,36 @@
 		    struct rtentry *rt);
 static void	tunstart(struct ifnet *);
 
+static int	tun_clone_create(struct if_clone *, int, caddr_t);
+static void	tun_clone_destroy(struct ifnet *);
+
+IFC_SIMPLE_DECLARE(tun, 0);
+
 static d_open_t		tunopen;
 static d_close_t	tunclose;
 static d_read_t		tunread;
 static d_write_t	tunwrite;
 static d_ioctl_t	tunioctl;
 static d_poll_t		tunpoll;
+static d_kqfilter_t	tunkqfilter;
+
+static int		tunkqread(struct knote *, long);
+static int		tunkqwrite(struct knote *, long);
+static void		tunkqdetach(struct knote *);
+
+static struct filterops tun_read_filterops = {
+	.f_isfd =	1,
+	.f_attach =	NULL,
+	.f_detach =	tunkqdetach,
+	.f_event =	tunkqread,
+};
+
+static struct filterops tun_write_filterops = {
+	.f_isfd =	1,
+	.f_attach =	NULL,
+	.f_detach =	tunkqdetach,
+	.f_event =	tunkqwrite,
+};
 
 static struct cdevsw tun_cdevsw = {
 	.d_version =	D_VERSION,
@@ -133,18 +169,49 @@
 	.d_write =	tunwrite,
 	.d_ioctl =	tunioctl,
 	.d_poll =	tunpoll,
+	.d_kqfilter =	tunkqfilter,
 	.d_name =	TUNNAME,
 };
 
+static int
+tun_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+	struct cdev *dev;
+	int i;
+
+	/* find any existing device, or allocate new unit number */
+	i = clone_create(&tunclones, &tun_cdevsw, &unit, &dev, 0);
+	if (i) {
+		/* No preexisting struct cdev *, create one */
+		dev = make_dev(&tun_cdevsw, unit2minor(unit),
+		    UID_UUCP, GID_DIALER, 0600, "%s%d", ifc->ifc_name, unit);
+		if (dev != NULL) {
+			dev_ref(dev);
+			dev->si_flags |= SI_CHEAPCLONE;
+		}
+	}
+	tuncreate(ifc->ifc_name, dev);
+
+	return (0);
+}
+
 static void
 tunclone(void *arg, struct ucred *cred, char *name, int namelen,
     struct cdev **dev)
 {
-	int u, i;
+	char devname[SPECNAMELEN + 1];
+	int u, i, append_unit;
 
 	if (*dev != NULL)
 		return;
 
+	/*
+	 * If tun cloning is enabled, only the superuser can create an
+	 * interface.
+	 */
+	if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)
+		return;
+
 	if (strcmp(name, TUNNAME) == 0) {
 		u = -1;
 	} else if (dev_stdclone(name, NULL, TUNNAME, &u) != 1)
@@ -152,17 +219,29 @@
 	if (u != -1 && u > IF_MAXUNIT)
 		return;	/* Unit number too high */
 
+	if (u == -1)
+		append_unit = 1;
+	else
+		append_unit = 0;
+
 	/* find any existing device, or allocate new unit number */
 	i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0);
 	if (i) {
+		if (append_unit) {
+			namelen = snprintf(devname, sizeof(devname), "%s%d", name,
+			    u);
+			name = devname;
+		}
 		/* No preexisting struct cdev *, create one */
 		*dev = make_dev(&tun_cdevsw, unit2minor(u),
-		    UID_UUCP, GID_DIALER, 0600, "tun%d", u);
+		    UID_UUCP, GID_DIALER, 0600, "%s", name);
 		if (*dev != NULL) {
 			dev_ref(*dev);
 			(*dev)->si_flags |= SI_CHEAPCLONE;
 		}
 	}
+
+	if_clone_create(name, namelen, NULL);
 }
 
 static void
@@ -179,10 +258,22 @@
 	if_detach(TUN2IFP(tp));
 	if_free(TUN2IFP(tp));
 	destroy_dev(dev);
+	knlist_destroy(&tp->tun_rsel.si_note);
 	mtx_destroy(&tp->tun_mtx);
 	free(tp, M_TUN);
 }
 
+static void
+tun_clone_destroy(struct ifnet *ifp)
+{
+	struct tun_softc *tp = ifp->if_softc;
+
+	mtx_lock(&tunmtx);
+	TAILQ_REMOVE(&tunhead, tp, tun_list);
+	mtx_unlock(&tunmtx);
+	tun_destroy(tp);
+}
+
 static int
 tunmodevent(module_t mod, int type, void *data)
 {
@@ -196,8 +287,10 @@
 		tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
 		if (tag == NULL)
 			return (ENOMEM);
+		if_clone_attach(&tun_cloner);
 		break;
 	case MOD_UNLOAD:
+		if_clone_detach(&tun_cloner);
 		EVENTHANDLER_DEREGISTER(dev_clone, tag);
 
 		mtx_lock(&tunmtx);
@@ -231,6 +324,7 @@
 	struct tun_softc *tp = ifp->if_softc;
 	struct mbuf *m;
 
+	TUNDEBUG(ifp,"%s starting\n", ifp->if_xname);
 	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
 		IFQ_LOCK(&ifp->if_snd);
 		IFQ_POLL_NOLOCK(&ifp->if_snd, m);
@@ -252,11 +346,12 @@
 	} else
 		mtx_unlock(&tp->tun_mtx);
 	selwakeuppri(&tp->tun_rsel, PZERO + 1);
+	KNOTE_UNLOCKED(&tp->tun_rsel.si_note, 0);
 }
 
 /* XXX: should return an error code so it can fail. */
 static void
-tuncreate(struct cdev *dev)
+tuncreate(const char *name, struct cdev *dev)
 {
 	struct tun_softc *sc;
 	struct ifnet *ifp;
@@ -274,8 +369,8 @@
 	ifp = sc->tun_ifp = if_alloc(IFT_PPP);
 	if (ifp == NULL)
 		panic("%s%d: failed to if_alloc() interface.\n",
-		    TUNNAME, dev2unit(dev));
-	if_initname(ifp, TUNNAME, dev2unit(dev));
+		    name, dev2unit(dev));
+	if_initname(ifp, name, dev2unit(dev));
 	ifp->if_mtu = TUNMTU;
 	ifp->if_ioctl = tunifioctl;
 	ifp->if_output = tunoutput;
@@ -285,10 +380,13 @@
 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
 	ifp->if_snd.ifq_drv_maxlen = 0;
 	IFQ_SET_READY(&ifp->if_snd);
+	knlist_init(&sc->tun_rsel.si_note, NULL, NULL, NULL, NULL);
 
 	if_attach(ifp);
 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
 	dev->si_drv1 = sc;
+	TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
+	    ifp->if_xname, minor(dev));
 }
 
 static int
@@ -303,7 +401,7 @@
 	 */
 	tp = dev->si_drv1;
 	if (!tp) {
-		tuncreate(dev);
+		tuncreate(TUNNAME, dev);
 		tp = dev->si_drv1;
 	}
 
@@ -360,22 +458,28 @@
 		splx(s);
 	}
 
+	/* Delete all addresses and routes which reference this interface. */
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		struct ifaddr *ifa;
 
 		s = splimp();
-		/* find internet addresses and delete routes */
-		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
-			if (ifa->ifa_addr->sa_family == AF_INET)
-				/* Unlocked read. */
+		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+			/* deal w/IPv4 PtP destination; unlocked read */
+			if (ifa->ifa_addr->sa_family == AF_INET) {
 				rtinit(ifa, (int)RTM_DELETE,
 				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
+			} else {
+				rtinit(ifa, (int)RTM_DELETE, 0);
+			}
+		}
+		if_purgeaddrs(ifp);
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		splx(s);
 	}
 
 	funsetown(&tp->tun_sigio);
 	selwakeuppri(&tp->tun_rsel, PZERO + 1);
+	KNOTE_UNLOCKED(&tp->tun_rsel.si_note, 0);
 	TUNDEBUG (ifp, "closed\n");
 	return (0);
 }
@@ -393,29 +497,23 @@
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	getmicrotime(&ifp->if_lastchange);
 
-	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa;
-	     ifa = TAILQ_NEXT(ifa, ifa_link)) {
-		if (ifa->ifa_addr == NULL)
-			error = EFAULT;
-			/* XXX: Should maybe return straight off? */
-		else {
 #ifdef INET
-			if (ifa->ifa_addr->sa_family == AF_INET) {
-			    struct sockaddr_in *si;
+	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+		if (ifa->ifa_addr->sa_family == AF_INET) {
+			struct sockaddr_in *si;
 
-			    si = (struct sockaddr_in *)ifa->ifa_addr;
-			    mtx_lock(&tp->tun_mtx);
-			    if (si->sin_addr.s_addr)
-				    tp->tun_flags |= TUN_IASET;
-
-			    si = (struct sockaddr_in *)ifa->ifa_dstaddr;
-			    if (si && si->sin_addr.s_addr)
-				    tp->tun_flags |= TUN_DSTADDR;
-			    mtx_unlock(&tp->tun_mtx);
-			}
-#endif
+			si = (struct sockaddr_in *)ifa->ifa_addr;
+			mtx_lock(&tp->tun_mtx);
+			if (si->sin_addr.s_addr)
+				tp->tun_flags |= TUN_IASET;
+
+			si = (struct sockaddr_in *)ifa->ifa_dstaddr;
+			if (si && si->sin_addr.s_addr)
+				tp->tun_flags |= TUN_DSTADDR;
+			mtx_unlock(&tp->tun_mtx);
 		}
 	}
+#endif
 	return (error);
 }
 
@@ -509,7 +607,7 @@
 		dst->sa_family = af; 
 	}
 
-	if (ifp->if_bpf) {
+	if (bpf_peers_present(ifp->if_bpf)) {
 		af = dst->sa_family;
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
 	}
@@ -575,9 +673,11 @@
 		tunp = (struct tuninfo *)data;
 		if (tunp->mtu < IF_MINMTU)
 			return (EINVAL);
-		if (TUN2IFP(tp)->if_mtu != tunp->mtu
-		&& (error = suser(td)) != 0)
-			return (error);
+		if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
+			error = priv_check(td, PRIV_NET_SETIFMTU);
+			if (error)
+				return (error);
+		}
 		TUN2IFP(tp)->if_mtu = tunp->mtu;
 		TUN2IFP(tp)->if_type = tunp->type;
 		TUN2IFP(tp)->if_baudrate = tunp->baudrate;
@@ -718,7 +818,7 @@
 			mtx_lock(&tp->tun_mtx);
 			tp->tun_flags |= TUN_RWAIT;
 			mtx_unlock(&tp->tun_mtx);
-			if((error = tsleep(tp, PCATCH | (PZERO + 1),
+			if ((error = tsleep(tp, PCATCH | (PZERO + 1),
 					"tunread", 0)) != 0) {
 				splx(s);
 				return (error);
@@ -768,7 +868,7 @@
 		return (EIO);
 	}
 
-	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0)) == NULL) {
+	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0, M_PKTHDR)) == NULL) {
 		ifp->if_ierrors++;
 		return (error);
 	}
@@ -863,3 +963,94 @@
 	splx(s);
 	return (revents);
 }
+
+/*
+ * tunkqfilter - support for the kevent() system call.
+ */
+static int
+tunkqfilter(struct cdev *dev, struct knote *kn)
+{
+	int			s;
+	struct tun_softc	*tp = dev->si_drv1;
+	struct ifnet	*ifp = TUN2IFP(tp);
+
+	s = splimp();
+	switch(kn->kn_filter) {
+	case EVFILT_READ:
+		TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
+		    ifp->if_xname, minor(dev));
+		kn->kn_fop = &tun_read_filterops;
+		break;
+
+	case EVFILT_WRITE:
+		TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
+		    ifp->if_xname, minor(dev));
+		kn->kn_fop = &tun_write_filterops;
+		break;
+	
+	default:
+		TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
+		    ifp->if_xname, minor(dev));
+		splx(s);
+		return(EINVAL);
+	}
+	splx(s);
+
+	kn->kn_hook = (caddr_t) dev;
+	knlist_add(&tp->tun_rsel.si_note, kn, 0);
+
+	return (0);
+}
+
+/*
+ * Return true of there is data in the interface queue.
+ */
+static int
+tunkqread(struct knote *kn, long hint)
+{
+	int			ret, s;
+	struct cdev		*dev = (struct cdev *)(kn->kn_hook);
+	struct tun_softc	*tp = dev->si_drv1;
+	struct ifnet	*ifp = TUN2IFP(tp);
+
+	s = splimp();
+	if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
+		TUNDEBUG(ifp,
+		    "%s have data in the queue.  Len = %d, minor = %#x\n",
+		    ifp->if_xname, ifp->if_snd.ifq_len, minor(dev));
+		ret = 1;
+	} else {
+		TUNDEBUG(ifp,
+		    "%s waiting for data, minor = %#x\n", ifp->if_xname,
+		    minor(dev));
+		ret = 0;
+	}
+	splx(s);
+
+	return (ret);
+}
+
+/*
+ * Always can write, always return MTU in kn->data.
+ */
+static int
+tunkqwrite(struct knote *kn, long hint)
+{
+	int			s;
+	struct tun_softc	*tp = ((struct cdev *)kn->kn_hook)->si_drv1;
+	struct ifnet	*ifp = TUN2IFP(tp);
+
+	s = splimp();
+	kn->kn_data = ifp->if_mtu;
+	splx(s);
+
+	return (1);
+}
+
+static void
+tunkqdetach(struct knote *kn)
+{
+	struct tun_softc	*tp = ((struct cdev *)kn->kn_hook)->si_drv1;
+
+	knlist_remove(&tp->tun_rsel.si_note, kn, 0);
+}
Index: if_atmsubr.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_atmsubr.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_atmsubr.c -L sys/net/if_atmsubr.c -u -r1.1.1.1 -r1.2
--- sys/net/if_atmsubr.c
+++ sys/net/if_atmsubr.c
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/net/if_atmsubr.c,v 1.37.2.3 2005/08/25 05:01:19 rwatson Exp $");
+__FBSDID("$FreeBSD: src/sys/net/if_atmsubr.c,v 1.45 2006/12/01 22:45:43 rwatson Exp $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -46,7 +46,6 @@
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
-#include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
@@ -71,6 +70,8 @@
 #include <netnatm/natm.h>
 #endif
 
+#include <security/mac/mac_framework.h>
+
 /*
  * Netgraph interface functions.
  * These need not be protected by a lock, because ng_atm nodes are persitent.
@@ -189,14 +190,8 @@
 			break;
 			
 		default:
-#if (defined(__FreeBSD__) && __FreeBSD_version >= 501113) || \
-    defined(__NetBSD__) || defined(__OpenBSD__)
 			printf("%s: can't handle af%d\n", ifp->if_xname, 
 			    dst->sa_family);
-#elif defined(__FreeBSD__) || defined(__bsdi__)
-			printf("%s%d: can't handle af%d\n", ifp->if_name, 
-			    ifp->if_unit, dst->sa_family);
-#endif
 			senderr(EAFNOSUPPORT);
 		}
 
@@ -315,17 +310,9 @@
 				return; /* failed */
 			alc = mtod(m, struct atmllc *);
 			if (bcmp(alc, ATMLLC_HDR, 6)) {
-#if (defined(__FreeBSD__) && __FreeBSD_version >= 501113) || \
-    defined(__NetBSD__) || defined(__OpenBSD__)
 				printf("%s: recv'd invalid LLC/SNAP frame "
 				    "[vp=%d,vc=%d]\n", ifp->if_xname,
 				    ATM_PH_VPI(ah), ATM_PH_VCI(ah));
-#elif defined(__FreeBSD__) || defined(__bsdi__)
-				printf("%s%d: recv'd invalid LLC/SNAP frame "
-				    "[vp=%d,vc=%d]\n", ifp->if_name,
-				    ifp->if_unit, ATM_PH_VPI(ah),
-				    ATM_PH_VCI(ah));
-#endif
 				m_freem(m);
 				return;
 			}
@@ -380,16 +367,9 @@
 #endif
 	ifp->if_snd.ifq_maxlen = 50;	/* dummy */
 
-#if defined(__NetBSD__) || defined(__OpenBSD__)
-	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
-#elif defined(__FreeBSD__) && (__FreeBSD__ > 2)
-	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa; 
-	    ifa = TAILQ_NEXT(ifa, ifa_link))
-#elif defined(__FreeBSD__) || defined(__bsdi__)
-	for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next) 
-#endif
-		if ((sdl = (struct sockaddr_dl *)ifa->ifa_addr) &&
-		    sdl->sdl_family == AF_LINK) {
+	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
+		if (ifa->ifa_addr->sa_family == AF_LINK) {
+			sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 			sdl->sdl_type = IFT_ATM;
 			sdl->sdl_alen = ifp->if_addrlen;
 #ifdef notyet /* if using ATMARP, store hardware address using the next line */
Index: if_faith.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_faith.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_faith.c -L sys/net/if_faith.c -u -r1.1.1.1 -r1.2
--- sys/net/if_faith.c
+++ sys/net/if_faith.c
@@ -28,7 +28,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/net/if_faith.c,v 1.36.2.1 2005/08/25 05:01:19 rwatson Exp $
+ * $FreeBSD: src/sys/net/if_faith.c,v 1.42 2006/08/04 21:27:37 brooks Exp $
  */
 /*
  * derived from
@@ -78,13 +78,10 @@
 #include <netinet6/ip6_var.h>
 #endif
 
-#include <net/net_osdep.h>
-
 #define FAITHNAME	"faith"
 
 struct faith_softc {
 	struct ifnet *sc_ifp;
-	LIST_ENTRY(faith_softc) sc_list;
 };
 
 static int faithioctl(struct ifnet *, u_long, caddr_t);
@@ -97,13 +94,10 @@
 
 static int faithmodevent(module_t, int, void *);
 
-static struct mtx faith_mtx;
 static MALLOC_DEFINE(M_FAITH, FAITHNAME, "Firewall Assisted Tunnel Interface");
-static LIST_HEAD(, faith_softc) faith_softc_list;
 
-static int	faith_clone_create(struct if_clone *, int);
+static int	faith_clone_create(struct if_clone *, int, caddr_t);
 static void	faith_clone_destroy(struct ifnet *);
-static void	faith_destroy(struct faith_softc *);
 
 IFC_SIMPLE_DECLARE(faith, 0);
 
@@ -115,12 +109,9 @@
 	int type;
 	void *data;
 {
-	struct faith_softc *sc;
 
 	switch (type) {
 	case MOD_LOAD:
-		mtx_init(&faith_mtx, "faith_mtx", NULL, MTX_DEF);
-		LIST_INIT(&faith_softc_list);
 		if_clone_attach(&faith_cloner);
 
 #ifdef INET6
@@ -134,16 +125,6 @@
 #endif
 
 		if_clone_detach(&faith_cloner);
-
-		mtx_lock(&faith_mtx);
-		while ((sc = LIST_FIRST(&faith_softc_list)) != NULL) {
-			LIST_REMOVE(sc, sc_list);
-			mtx_unlock(&faith_mtx);
-			faith_destroy(sc);
-			mtx_lock(&faith_mtx);
-		}
-		mtx_unlock(&faith_mtx);
-		mtx_destroy(&faith_mtx);
 		break;
 	default:
 		return EOPNOTSUPP;
@@ -161,9 +142,10 @@
 MODULE_VERSION(if_faith, 1);
 
 static int
-faith_clone_create(ifc, unit)
+faith_clone_create(ifc, unit, params)
 	struct if_clone *ifc;
 	int unit;
+	caddr_t params;
 {
 	struct ifnet *ifp;
 	struct faith_softc *sc;
@@ -188,33 +170,19 @@
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	if_attach(ifp);
 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
-	mtx_lock(&faith_mtx);
-	LIST_INSERT_HEAD(&faith_softc_list, sc, sc_list);
-	mtx_unlock(&faith_mtx);
 	return (0);
 }
 
 static void
-faith_destroy(struct faith_softc *sc)
-{
-
-	bpfdetach(sc->sc_ifp);
-	if_detach(sc->sc_ifp);
-	if_free(sc->sc_ifp);
-	free(sc, M_FAITH);
-}
-
-static void
 faith_clone_destroy(ifp)
 	struct ifnet *ifp;
 {
 	struct faith_softc *sc = ifp->if_softc;
 
-	mtx_lock(&faith_mtx);
-	LIST_REMOVE(sc, sc_list);
-	mtx_unlock(&faith_mtx);
-
-	faith_destroy(sc);
+	bpfdetach(ifp);
+	if_detach(ifp);
+	if_free(ifp);
+	free(sc, M_FAITH);
 }
 
 int
@@ -235,7 +203,7 @@
 		dst->sa_family = af;
 	}
 
-	if (ifp->if_bpf) {
+	if (bpf_peers_present(ifp->if_bpf)) {
 		af = dst->sa_family;
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
 	}
Index: raw_cb.c
===================================================================
RCS file: /home/cvs/src/sys/net/raw_cb.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/raw_cb.c -L sys/net/raw_cb.c -u -r1.1.1.1 -r1.2
--- sys/net/raw_cb.c
+++ sys/net/raw_cb.c
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)raw_cb.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/net/raw_cb.c,v 1.32 2005/01/24 22:56:09 rwatson Exp $
+ * $FreeBSD: src/sys/net/raw_cb.c,v 1.34 2006/06/02 08:27:15 rwatson Exp $
  */
 
 #include <sys/param.h>
@@ -98,10 +98,9 @@
 {
 	struct socket *so = rp->rcb_socket;
 
-	ACCEPT_LOCK();
-	SOCK_LOCK(so);
-	so->so_pcb = 0;
-	sotryfree(so);
+	KASSERT(so->so_pcb == rp, ("raw_detach: so_pcb != rp"));
+
+	so->so_pcb = NULL;
 	mtx_lock(&rawcb_mtx);
 	LIST_REMOVE(rp, list);
 	mtx_unlock(&rawcb_mtx);
@@ -114,7 +113,7 @@
 }
 
 /*
- * Disconnect and possibly release resources.
+ * Disconnect raw socket.
  */
 void
 raw_disconnect(rp)
@@ -126,8 +125,6 @@
 		m_freem(dtom(rp->rcb_faddr));
 	rp->rcb_faddr = 0;
 #endif
-	if (rp->rcb_socket->so_state & SS_NOFDREF)
-		raw_detach(rp);
 }
 
 #ifdef notdef
Index: if_atm.h
===================================================================
RCS file: /home/cvs/src/sys/net/if_atm.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_atm.h -L sys/net/if_atm.h -u -r1.1.1.1 -r1.2
--- sys/net/if_atm.h
+++ sys/net/if_atm.h
@@ -1,5 +1,5 @@
 /*      $NetBSD: if_atm.h,v 1.7 1996/11/09 23:02:27 chuck Exp $       */
-/* $FreeBSD: src/sys/net/if_atm.h,v 1.23.2.1 2005/08/25 05:01:19 rwatson Exp $ */
+/* $FreeBSD: src/sys/net/if_atm.h,v 1.24 2005/08/09 10:19:58 rwatson Exp $ */
 
 /*-
  *
Index: ppp_tty.c
===================================================================
RCS file: /home/cvs/src/sys/net/ppp_tty.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/ppp_tty.c -L sys/net/ppp_tty.c -u -r1.1.1.1 -r1.2
--- sys/net/ppp_tty.c
+++ sys/net/ppp_tty.c
@@ -71,7 +71,7 @@
  * Paul Mackerras (paulus at cs.anu.edu.au).
  */
 
-/* $FreeBSD: src/sys/net/ppp_tty.c,v 1.66.2.2 2005/08/25 05:01:20 rwatson Exp $ */
+/* $FreeBSD: src/sys/net/ppp_tty.c,v 1.72 2006/12/05 18:54:21 ume Exp $ */
 
 #include "opt_ppp.h"		/* XXX for ppp_defs.h */
 
@@ -79,6 +79,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
@@ -179,7 +180,8 @@
     register struct ppp_softc *sc;
     int error, s;
 
-    if ((error = suser(td)) != 0)
+    error = priv_check(td, PRIV_NET_PPP);
+    if (error)
 	return (error);
 
     s = spltty();
@@ -275,7 +277,7 @@
 	sc->sc_m = NULL;
     }
     if (sc->sc_flags & SC_TIMEOUT) {
-	untimeout(ppp_timeout, (void *) sc, sc->sc_ch);
+	callout_stop(&sc->sc_timo_ch);
 	sc->sc_flags &= ~SC_TIMEOUT;
     }
     splx(s);
@@ -384,7 +386,7 @@
 	return (EMSGSIZE);
 
     s = spltty();
-    if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0)) == NULL) {
+    if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0, M_PKTHDR)) == NULL) {
 	splx(s);
 	return (ENOBUFS);
     }
@@ -423,7 +425,8 @@
     error = 0;
     switch (cmd) {
     case PPPIOCSASYNCMAP:
-	if ((error = suser(td)) != 0)
+	error = priv_check(td, PRIV_NET_PPP);
+	if (error)
 	    break;
 	sc->sc_asyncmap[0] = *(u_int *)data;
 	break;
@@ -433,7 +436,8 @@
 	break;
 
     case PPPIOCSRASYNCMAP:
-	if ((error = suser(td)) != 0)
+	error = priv_check(td, PRIV_NET_PPP);
+	if (error)
 	    break;
 	sc->sc_rasyncmap = *(u_int *)data;
 	break;
@@ -443,7 +447,8 @@
 	break;
 
     case PPPIOCSXASYNCMAP:
-	if ((error = suser(td)) != 0)
+	error = priv_check(td, PRIV_NET_PPP);
+	if (error)
 	    break;
 	s = spltty();
 	bcopy(data, sc->sc_asyncmap, sizeof(sc->sc_asyncmap));
@@ -693,7 +698,7 @@
      * drained the t_outq.
      */
     if (!idle && (sc->sc_flags & SC_TIMEOUT) == 0) {
-	sc->sc_ch = timeout(ppp_timeout, (void *) sc, 1);
+	callout_reset(&sc->sc_timo_ch, 1, ppp_timeout, sc);
 	sc->sc_flags |= SC_TIMEOUT;
     }
 
@@ -735,8 +740,7 @@
      * Call output process whether or not there is any output.
      * We are being called in lieu of ttstart and must do what it would.
      */
-    if (tp->t_oproc != NULL)
-	(*tp->t_oproc)(tp);
+    tt_oproc(tp);
 
     /*
      * If the transmit queue has drained and the tty has not hung up
@@ -844,14 +848,13 @@
 	if (c == tp->t_cc[VSTOP] && tp->t_cc[VSTOP] != _POSIX_VDISABLE) {
 	    if ((tp->t_state & TS_TTSTOP) == 0) {
 		tp->t_state |= TS_TTSTOP;
-		tp->t_stop(tp, 0);
+		tt_stop(tp, 0);
 	    }
 	    return 0;
 	}
 	if (c == tp->t_cc[VSTART] && tp->t_cc[VSTART] != _POSIX_VDISABLE) {
 	    tp->t_state &= ~TS_TTSTOP;
-	    if (tp->t_oproc != NULL)
-		(*tp->t_oproc)(tp);
+	    tt_oproc(tp);
 	    return 0;
 	}
     }
Index: rtsock.c
===================================================================
RCS file: /home/cvs/src/sys/net/rtsock.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/net/rtsock.c -L sys/net/rtsock.c -u -r1.2 -r1.3
--- sys/net/rtsock.c
+++ sys/net/rtsock.c
@@ -27,15 +27,16 @@
  * SUCH DAMAGE.
  *
  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
- * $FreeBSD: src/sys/net/rtsock.c,v 1.123.2.7 2006/04/04 20:07:23 andre Exp $
+ * $FreeBSD: src/sys/net/rtsock.c,v 1.143 2007/09/08 19:28:45 cognet Exp $
  */
-
+#include "opt_sctp.h"
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
@@ -51,6 +52,10 @@
 
 #include <netinet/in.h>
 
+#ifdef SCTP
+extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
+#endif /* SCTP */
+
 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
 
 /* NB: these are not modified */
@@ -137,11 +142,18 @@
  * It really doesn't make any sense at all for this code to share much
  * with raw_usrreq.c, since its functionality is so restricted.  XXX
  */
-static int
+static void
 rts_abort(struct socket *so)
 {
 
-	return (raw_usrreqs.pru_abort(so));
+	raw_usrreqs.pru_abort(so);
+}
+
+static void
+rts_close(struct socket *so)
+{
+
+	raw_usrreqs.pru_close(so);
 }
 
 /* pru_accept is EOPNOTSUPP */
@@ -152,8 +164,8 @@
 	struct rawcb *rp;
 	int s, error;
 
-	if (sotorawcb(so) != NULL)
-		return EISCONN;	/* XXX panic? */
+	KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
+
 	/* XXX */
 	MALLOC(rp, struct rawcb *, sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
 	if (rp == NULL)
@@ -214,32 +226,28 @@
 /* pru_connect2 is EOPNOTSUPP */
 /* pru_control is EOPNOTSUPP */
 
-static int
+static void
 rts_detach(struct socket *so)
 {
 	struct rawcb *rp = sotorawcb(so);
-	int s, error;
 
-	s = splnet();
-	if (rp != NULL) {
-		RTSOCK_LOCK();
-		switch(rp->rcb_proto.sp_protocol) {
-		case AF_INET:
-			route_cb.ip_count--;
-			break;
-		case AF_INET6:
-			route_cb.ip6_count--;
-			break;
-		case AF_IPX:
-			route_cb.ipx_count--;
-			break;
-		}
-		route_cb.any_count--;
-		RTSOCK_UNLOCK();
+	KASSERT(rp != NULL, ("rts_detach: rp == NULL"));
+
+	RTSOCK_LOCK();
+	switch(rp->rcb_proto.sp_protocol) {
+	case AF_INET:
+		route_cb.ip_count--;
+		break;
+	case AF_INET6:
+		route_cb.ip6_count--;
+		break;
+	case AF_IPX:
+		route_cb.ipx_count--;
+		break;
 	}
-	error = raw_usrreqs.pru_detach(so);
-	splx(s);
-	return error;
+	route_cb.any_count--;
+	RTSOCK_UNLOCK();
+	raw_usrreqs.pru_detach(so);
 }
 
 static int
@@ -296,6 +304,7 @@
 	.pru_send =		rts_send,
 	.pru_shutdown =		rts_shutdown,
 	.pru_sockaddr =		rts_sockaddr,
+	.pru_close =		rts_close,
 };
 
 /*ARGSUSED*/
@@ -309,7 +318,6 @@
 	struct rt_addrinfo info;
 	int len, error = 0;
 	struct ifnet *ifp = NULL;
-	struct ifaddr *ifa = NULL;
 	struct sockaddr_in jail;
 
 #define senderr(e) { error = e; goto flush;}
@@ -364,8 +372,11 @@
 	 * Verify that the caller has the appropriate privilege; RTM_GET
 	 * is the only operation the non-superuser is allowed.
 	 */
-	if (rtm->rtm_type != RTM_GET && (error = suser(curthread)) != 0)
-		senderr(error);
+	if (rtm->rtm_type != RTM_GET) {
+		error = priv_check(curthread, PRIV_NET_ROUTE);
+		if (error)
+			senderr(error);
+	}
 
 	switch (rtm->rtm_type) {
 		struct rtentry *saved_nrt;
@@ -413,6 +424,25 @@
 		RT_ADDREF(rt);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 
+		/* 
+		 * Fix for PR: 82974
+		 *
+		 * RTM_CHANGE/LOCK need a perfect match, rn_lookup()
+		 * returns a perfect match in case a netmask is
+		 * specified.  For host routes only a longest prefix
+		 * match is returned so it is necessary to compare the
+		 * existence of the netmask.  If both have a netmask
+		 * rnh_lookup() did a perfect match and if none of them
+		 * have a netmask both are host routes which is also a
+		 * perfect match.
+		 */
+
+		if (rtm->rtm_type != RTM_GET && 
+		    (!rt_mask(rt) != !info.rti_info[RTAX_NETMASK])) {
+			RT_UNLOCK(rt);
+			senderr(ESRCH);
+		}
+
 		switch(rtm->rtm_type) {
 
 		case RTM_GET:
@@ -426,7 +456,7 @@
 				ifp = rt->rt_ifp;
 				if (ifp) {
 					info.rti_info[RTAX_IFP] =
-					    ifaddr_byindex(ifp->if_index)->ifa_addr;
+					    ifp->if_addr->ifa_addr;
 					if (jailed(so->so_cred)) {
 						bzero(&jail, sizeof(jail));
 						jail.sin_family = PF_INET;
@@ -484,26 +514,28 @@
 					senderr(error);
 				RT_LOCK(rt);
 			}
-			if (info.rti_info[RTAX_GATEWAY] != NULL &&
-			    (error = rt_setgate(rt, rt_key(rt),
-					info.rti_info[RTAX_GATEWAY])) != 0) {
-				RT_UNLOCK(rt);
-				senderr(error);
+			if (info.rti_ifa != NULL &&
+			    info.rti_ifa != rt->rt_ifa &&
+			    rt->rt_ifa != NULL &&
+			    rt->rt_ifa->ifa_rtrequest != NULL) {
+				rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
+				    &info);
+				IFAFREE(rt->rt_ifa);
 			}
-			if ((ifa = info.rti_ifa) != NULL) {
-				struct ifaddr *oifa = rt->rt_ifa;
-				if (oifa != ifa) {
-					if (oifa) {
-						if (oifa->ifa_rtrequest)
-							oifa->ifa_rtrequest(
-								RTM_DELETE, rt,
-								&info);
-						IFAFREE(oifa);
-					}
-				        IFAREF(ifa);
-				        rt->rt_ifa = ifa;
-				        rt->rt_ifp = info.rti_ifp;
+			if (info.rti_info[RTAX_GATEWAY] != NULL) {
+				if ((error = rt_setgate(rt, rt_key(rt),
+					info.rti_info[RTAX_GATEWAY])) != 0) {
+					RT_UNLOCK(rt);
+					senderr(error);
 				}
+				if (!(rt->rt_flags & RTF_LLINFO))
+					rt->rt_flags |= RTF_GATEWAY;
+			}
+			if (info.rti_ifa != NULL &&
+			    info.rti_ifa != rt->rt_ifa) {
+				IFAREF(info.rti_ifa);
+				rt->rt_ifa = info.rti_ifa;
+				rt->rt_ifp = info.rti_ifp;
 			}
 			/* Allow some flags to be toggled on change. */
 			if (rtm->rtm_fmask & RTF_FMASK)
@@ -590,7 +622,10 @@
 	 * of tcp hostcache. The rest is ignored.
 	 */
 	metric(RTV_MTU, rmx_mtu);
-	metric(RTV_EXPIRE, rmx_expire);
+	/* Userland -> kernel timebase conversion. */
+	if (which & RTV_EXPIRE)
+		out->rmx_expire = in->rmx_expire ?
+		    in->rmx_expire - time_second + time_uptime : 0;
 #undef metric
 }
 
@@ -600,7 +635,9 @@
 #define metric(e) out->e = in->e;
 	bzero(out, sizeof(*out));
 	metric(rmx_mtu);
-	metric(rmx_expire);
+	/* Kernel -> userland timebase conversion. */
+	out->rmx_expire = in->rmx_expire ?
+	    in->rmx_expire - time_uptime + time_second : 0;
 #undef metric
 }
 
@@ -851,7 +888,14 @@
 
 	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
 		("unexpected cmd %u", cmd));
-
+#ifdef SCTP
+	/*
+	 * notify the SCTP stack
+	 * this will only get called when an address is added/deleted
+	 * XXX pass the ifaddr struct instead if ifa->ifa_addr...
+	 */
+	sctp_addr_change(ifa, cmd);
+#endif /* SCTP */
 	if (route_cb.any_count == 0)
 		return;
 	for (pass = 1; pass < 3; pass++) {
@@ -862,8 +906,7 @@
 			int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
 
 			info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
-			info.rti_info[RTAX_IFP] =
-			    ifaddr_byindex(ifp->if_index)->ifa_addr;
+			info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
 			if ((m = rt_msg1(ncmd, &info)) == NULL)
@@ -913,8 +956,7 @@
 
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
-	info.rti_info[RTAX_IFP] =
-	    ifp ? ifaddr_byindex(ifp->if_index)->ifa_addr : NULL;
+	info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL;
 	/*
 	 * If a link-layer address is present, present it as a ``gateway''
 	 * (similarly to how ARP entries, e.g., are presented).
@@ -924,6 +966,8 @@
 	if (m == NULL)
 		return;
 	ifmam = mtod(m, struct ifma_msghdr *);
+	KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
+	    __func__));
 	ifmam->ifmam_index = ifp->if_index;
 	ifmam->ifmam_addrs = info.rti_addrs;
 	rt_dispatch(m, ifma->ifma_addr);
@@ -1047,8 +1091,7 @@
 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 	info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
 	if (rt->rt_ifp) {
-		info.rti_info[RTAX_IFP] =
-		    ifaddr_byindex(rt->rt_ifp->if_index)->ifa_addr;
+		info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
@@ -1082,7 +1125,7 @@
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		if (w->w_arg && w->w_arg != ifp->if_index)
 			continue;
-		ifa = ifaddr_byindex(ifp->if_index);
+		ifa = ifp->if_addr;
 		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
 		len = rt_msg2(RTM_IFINFO, &info, NULL, w);
 		info.rti_info[RTAX_IFP] = NULL;
@@ -1143,7 +1186,7 @@
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		if (w->w_arg && w->w_arg != ifp->if_index)
 			continue;
-		ifa = ifaddr_byindex(ifp->if_index);
+		ifa = ifp->if_addr;
 		info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
 		IF_ADDR_LOCK(ifp);
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
@@ -1243,7 +1286,7 @@
  * Definitions of protocols supported in the ROUTE domain.
  */
 
-extern struct domain routedomain;		/* or at least forward */
+static struct domain routedomain;		/* or at least forward */
 
 static struct protosw routesw[] = {
 {
Index: bpf.c
===================================================================
RCS file: /home/cvs/src/sys/net/bpf.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/bpf.c -L sys/net/bpf.c -u -r1.1.1.1 -r1.2
--- sys/net/bpf.c
+++ sys/net/bpf.c
@@ -33,7 +33,7 @@
  *
  *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
  *
- * $FreeBSD: src/sys/net/bpf.c,v 1.153.2.6 2005/10/01 18:43:16 csjp Exp $
+ * $FreeBSD: src/sys/net/bpf.c,v 1.181.2.1 2007/10/20 15:09:24 csjp Exp $
  */
 
 #include "opt_bpf.h"
@@ -45,10 +45,10 @@
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/time.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/signalvar.h>
 #include <sys/filio.h>
@@ -65,6 +65,9 @@
 
 #include <net/if.h>
 #include <net/bpf.h>
+#ifdef BPF_JITTER
+#include <net/bpf_jitter.h>
+#endif
 #include <net/bpfdesc.h>
 
 #include <netinet/in.h>
@@ -72,12 +75,18 @@
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
+#include <net80211/ieee80211_freebsd.h>
+
+#include <security/mac/mac_framework.h>
+
 static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
 
 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
 
 #define PRINET  26			/* interruptible */
 
+#define	M_SKIP_BPF	M_SKIP_FIREWALL
+
 /*
  * bpf_iflist is a list of BPF interface structures, each corresponding to a
  * specific DLT.  The same network interface might have several BPF interface
@@ -88,19 +97,20 @@
 static struct mtx	bpf_mtx;		/* bpf global lock */
 static int		bpf_bpfd_cnt;
 
-static int	bpf_allocbufs(struct bpf_d *);
-static void	bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
-static void	bpf_detachd(struct bpf_d *d);
+static void	bpf_allocbufs(struct bpf_d *);
+static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
+static void	bpf_detachd(struct bpf_d *);
 static void	bpf_freed(struct bpf_d *);
 static void	bpf_mcopy(const void *, void *, size_t);
-static int	bpf_movein(struct uio *, int, int,
-		    struct mbuf **, struct sockaddr *, struct bpf_insn *);
+static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
+		    struct sockaddr *, int *, struct bpf_insn *);
 static int	bpf_setif(struct bpf_d *, struct ifreq *);
 static void	bpf_timed_out(void *);
 static __inline void
 		bpf_wakeup(struct bpf_d *);
 static void	catchpacket(struct bpf_d *, u_char *, u_int,
-		    u_int, void (*)(const void *, void *, size_t));
+		    u_int, void (*)(const void *, void *, size_t),
+		    struct timeval *);
 static void	reset_d(struct bpf_d *);
 static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
 static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
@@ -111,16 +121,13 @@
 static void	bpf_clone(void *, struct ucred *, char *, int, struct cdev **);
 static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
 
-/*
- * The default read buffer size is patchable.
- */
 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
 static int bpf_bufsize = 4096;
 SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW,
-    &bpf_bufsize, 0, "");
+    &bpf_bufsize, 0, "Default bpf buffer size");
 static int bpf_maxbufsize = BPF_MAXBUFSIZE;
 SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
-    &bpf_maxbufsize, 0, "");
+    &bpf_maxbufsize, 0, "Maximum bpf buffer size");
 static int bpf_maxinsns = BPF_MAXINSNS;
 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
     &bpf_maxinsns, 0, "Maximum bpf program instructions");
@@ -137,7 +144,6 @@
 
 static struct cdevsw bpf_cdevsw = {
 	.d_version =	D_VERSION,
-	.d_flags =	D_NEEDGIANT,
 	.d_open =	bpfopen,
 	.d_close =	bpfclose,
 	.d_read =	bpfread,
@@ -152,14 +158,11 @@
 	{ 1, NULL, filt_bpfdetach, filt_bpfread };
 
 static int
-bpf_movein(uio, linktype, mtu, mp, sockp, wfilter)
-	struct uio *uio;
-	int linktype;
-	int mtu;
-	struct mbuf **mp;
-	struct sockaddr *sockp;
-	struct bpf_insn *wfilter;
+bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
+    struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
 {
+	const struct ieee80211_bpf_params *p;
+	struct ether_header *eh;
 	struct mbuf *m;
 	int error;
 	int len;
@@ -222,13 +225,24 @@
 		hlen = 4;	/* This should match PPP_HDRLEN */
 		break;
 
+	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
+		sockp->sa_family = AF_IEEE80211;
+		hlen = 0;
+		break;
+
+	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
+		sockp->sa_family = AF_IEEE80211;
+		sockp->sa_len = 12;	/* XXX != 0 */
+		hlen = sizeof(struct ieee80211_bpf_params);
+		break;
+
 	default:
 		return (EIO);
 	}
 
 	len = uio->uio_resid;
 
-	if (len - hlen > mtu)
+	if (len - hlen > ifp->if_mtu)
 		return (EMSGSIZE);
 
 	if ((unsigned)len > MCLBYTES)
@@ -260,19 +274,44 @@
 		goto bad;
 	}
 
+	/* Check for multicast destination */
+	switch (linktype) {
+	case DLT_EN10MB:
+		eh = mtod(m, struct ether_header *);
+		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
+			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
+			    ETHER_ADDR_LEN) == 0)
+				m->m_flags |= M_BCAST;
+			else
+				m->m_flags |= M_MCAST;
+		}
+		break;
+	}
+
 	/*
 	 * Make room for link header, and copy it to sockaddr
 	 */
 	if (hlen != 0) {
+		if (sockp->sa_family == AF_IEEE80211) {
+			/*
+			 * Collect true length from the parameter header
+			 * NB: sockp is known to be zero'd so if we do a
+			 *     short copy unspecified parameters will be
+			 *     zero.
+			 * NB: packet may not be aligned after stripping
+			 *     bpf params
+			 * XXX check ibp_vers
+			 */
+			p = mtod(m, const struct ieee80211_bpf_params *);
+			hlen = p->ibp_len;
+			if (hlen > sizeof(sockp->sa_data)) {
+				error = EINVAL;
+				goto bad;
+			}
+		}
 		bcopy(m->m_data, sockp->sa_data, hlen);
-		m->m_pkthdr.len -= hlen;
-		m->m_len -= hlen;
-#if BSD >= 199103
-		m->m_data += hlen; /* XXX */
-#else
-		m->m_off += hlen;
-#endif
 	}
+	*hdrlen = hlen;
 
 	return (0);
 bad:
@@ -284,9 +323,7 @@
  * Attach file to the bpf interface, i.e. make d listen on bp.
  */
 static void
-bpf_attachd(d, bp)
-	struct bpf_d *d;
-	struct bpf_if *bp;
+bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 {
 	/*
 	 * Point d at bp, and add d to the interface's list of listeners.
@@ -298,7 +335,6 @@
 	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
 
 	bpf_bpfd_cnt++;
-	*bp->bif_driverp = bp;
 	BPFIF_UNLOCK(bp);
 }
 
@@ -306,8 +342,7 @@
  * Detach a file from its interface.
  */
 static void
-bpf_detachd(d)
-	struct bpf_d *d;
+bpf_detachd(struct bpf_d *d)
 {
 	int error;
 	struct bpf_if *bp;
@@ -324,12 +359,6 @@
 	LIST_REMOVE(d, bd_next);
 
 	bpf_bpfd_cnt--;
-	/*
-	 * Let the driver know that there are no more listeners.
-	 */
-	if (LIST_EMPTY(&bp->bif_dlist))
-		*bp->bif_driverp = NULL;
-
 	d->bd_bif = NULL;
 	BPFD_UNLOCK(d);
 	BPFIF_UNLOCK(bp);
@@ -360,11 +389,7 @@
  */
 /* ARGSUSED */
 static	int
-bpfopen(dev, flags, fmt, td)
-	struct cdev *dev;
-	int flags;
-	int fmt;
-	struct thread *td;
+bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
 	struct bpf_d *d;
 
@@ -388,15 +413,14 @@
 	dev->si_drv1 = d;
 	d->bd_bufsize = bpf_bufsize;
 	d->bd_sig = SIGIO;
-	d->bd_seesent = 1;
+	d->bd_direction = BPF_D_INOUT;
 	d->bd_pid = td->td_proc->p_pid;
-	strlcpy(d->bd_pcomm, td->td_proc->p_comm, MAXCOMLEN);
 #ifdef MAC
 	mac_init_bpfdesc(d);
 	mac_create_bpfdesc(td->td_ucred, d);
 #endif
 	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
-	callout_init(&d->bd_callout, NET_CALLOUT_MPSAFE);
+	callout_init(&d->bd_callout, CALLOUT_MPSAFE);
 	knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL);
 
 	return (0);
@@ -408,11 +432,7 @@
  */
 /* ARGSUSED */
 static	int
-bpfclose(dev, flags, fmt, td)
-	struct cdev *dev;
-	int flags;
-	int fmt;
-	struct thread *td;
+bpfclose(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
 	struct bpf_d *d = dev->si_drv1;
 
@@ -454,10 +474,7 @@
  *  bpfread - read next chunk of packets from buffers
  */
 static	int
-bpfread(dev, uio, ioflag)
-	struct cdev *dev;
-	struct uio *uio;
-	int ioflag;
+bpfread(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	struct bpf_d *d = dev->si_drv1;
 	int timed_out;
@@ -471,6 +488,7 @@
 		return (EINVAL);
 
 	BPFD_LOCK(d);
+	d->bd_pid = curthread->td_proc->p_pid;
 	if (d->bd_state == BPF_WAITING)
 		callout_stop(&d->bd_callout);
 	timed_out = (d->bd_state == BPF_TIMED_OUT);
@@ -560,8 +578,7 @@
  * If there are processes sleeping on this descriptor, wake them up.
  */
 static __inline void
-bpf_wakeup(d)
-	struct bpf_d *d;
+bpf_wakeup(struct bpf_d *d)
 {
 
 	BPFD_LOCK_ASSERT(d);
@@ -578,8 +595,7 @@
 }
 
 static void
-bpf_timed_out(arg)
-	void *arg;
+bpf_timed_out(void *arg)
 {
 	struct bpf_d *d = (struct bpf_d *)arg;
 
@@ -592,18 +608,16 @@
 	BPFD_UNLOCK(d);
 }
 
-static	int
-bpfwrite(dev, uio, ioflag)
-	struct cdev *dev;
-	struct uio *uio;
-	int ioflag;
+static int
+bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	struct bpf_d *d = dev->si_drv1;
 	struct ifnet *ifp;
-	struct mbuf *m;
-	int error;
+	struct mbuf *m, *mc;
 	struct sockaddr dst;
+	int error, hlen;
 
+	d->bd_pid = curthread->td_proc->p_pid;
 	if (d->bd_bif == NULL)
 		return (ENXIO);
 
@@ -616,25 +630,47 @@
 		return (0);
 
 	bzero(&dst, sizeof(dst));
-	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu,
-	    &m, &dst, d->bd_wfilter);
+	m = NULL;
+	hlen = 0;
+	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
+	    &m, &dst, &hlen, d->bd_wfilter);
 	if (error)
 		return (error);
 
 	if (d->bd_hdrcmplt)
 		dst.sa_family = pseudo_AF_HDRCMPLT;
 
+	if (d->bd_feedback) {
+		mc = m_dup(m, M_DONTWAIT);
+		if (mc != NULL)
+			mc->m_pkthdr.rcvif = ifp;
+		/* XXX Do not return the same packet twice. */
+		if (d->bd_direction == BPF_D_INOUT)
+			m->m_flags |= M_SKIP_BPF;
+	} else
+		mc = NULL;
+
+	m->m_pkthdr.len -= hlen;
+	m->m_len -= hlen;
+	m->m_data += hlen;	/* XXX */
+
 #ifdef MAC
 	BPFD_LOCK(d);
 	mac_create_mbuf_from_bpfdesc(d, m);
+	if (mc != NULL)
+		mac_create_mbuf_from_bpfdesc(d, mc);
 	BPFD_UNLOCK(d);
 #endif
-	NET_LOCK_GIANT();
+
 	error = (*ifp->if_output)(ifp, m, &dst, NULL);
-	NET_UNLOCK_GIANT();
-	/*
-	 * The driver frees the mbuf.
-	 */
+
+	if (mc != NULL) {
+		if (error == 0)
+			(*ifp->if_input)(ifp, mc);
+		else
+			m_freem(mc);
+	}
+
 	return (error);
 }
 
@@ -643,8 +679,7 @@
  * receive and drop counts.
  */
 static void
-reset_d(d)
-	struct bpf_d *d;
+reset_d(struct bpf_d *d)
 {
 
 	mtx_assert(&d->bd_mtx, MA_OWNED);
@@ -678,18 +713,15 @@
  *  BIOCVERSION		Get filter language version.
  *  BIOCGHDRCMPLT	Get "header already complete" flag
  *  BIOCSHDRCMPLT	Set "header already complete" flag
- *  BIOCGSEESENT	Get "see packets sent" flag
- *  BIOCSSEESENT	Set "see packets sent" flag
+ *  BIOCGDIRECTION	Get packet direction flag
+ *  BIOCSDIRECTION	Set packet direction flag
  *  BIOCLOCK		Set "locked" flag
+ *  BIOCFEEDBACK	Set packet feedback mode.
  */
 /* ARGSUSED */
 static	int
-bpfioctl(dev, cmd, addr, flags, td)
-	struct cdev *dev;
-	u_long cmd;
-	caddr_t addr;
-	int flags;
-	struct thread *td;
+bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
+    struct thread *td)
 {
 	struct bpf_d *d = dev->si_drv1;
 	int error = 0;
@@ -697,8 +729,8 @@
 	/* 
 	 * Refresh PID associated with this descriptor.
 	 */
-	d->bd_pid = td->td_proc->p_pid;
 	BPFD_LOCK(d);
+	d->bd_pid = td->td_proc->p_pid;
 	if (d->bd_state == BPF_WAITING)
 		callout_stop(&d->bd_callout);
 	d->bd_state = BPF_IDLE;
@@ -716,6 +748,7 @@
 		case BIOCVERSION:
 		case BIOCGRSIG:
 		case BIOCGHDRCMPLT:
+		case BIOCFEEDBACK:
 		case FIONREAD:
 		case BIOCLOCK:
 		case BIOCSRTIMEOUT:
@@ -815,9 +848,7 @@
 			break;
 		}
 		if (d->bd_promisc == 0) {
-			mtx_lock(&Giant);
 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
-			mtx_unlock(&Giant);
 			if (error == 0)
 				d->bd_promisc = 1;
 		}
@@ -938,9 +969,6 @@
 		*(u_int *)addr = d->bd_hdrcmplt;
 		break;
 
-	case BIOCLOCK:
-		d->bd_locked = 1;
-		break;
 	/*
 	 * Set "header already complete" flag
 	 */
@@ -949,17 +977,38 @@
 		break;
 
 	/*
-	 * Get "see sent packets" flag
+	 * Get packet direction flag
 	 */
-	case BIOCGSEESENT:
-		*(u_int *)addr = d->bd_seesent;
+	case BIOCGDIRECTION:
+		*(u_int *)addr = d->bd_direction;
 		break;
 
 	/*
-	 * Set "see sent packets" flag
+	 * Set packet direction flag
 	 */
-	case BIOCSSEESENT:
-		d->bd_seesent = *(u_int *)addr;
+	case BIOCSDIRECTION:
+		{
+			u_int	direction;
+
+			direction = *(u_int *)addr;
+			switch (direction) {
+			case BPF_D_IN:
+			case BPF_D_INOUT:
+			case BPF_D_OUT:
+				d->bd_direction = direction;
+				break;
+			default:
+				error = EINVAL;
+			}
+		}
+		break;
+
+	case BIOCFEEDBACK:
+		d->bd_feedback = *(u_int *)addr;
+		break;
+
+	case BIOCLOCK:
+		d->bd_locked = 1;
 		break;
 
 	case FIONBIO:		/* Non-blocking I/O */
@@ -1011,20 +1060,26 @@
  * free it and replace it.  Returns EINVAL for bogus requests.
  */
 static int
-bpf_setf(d, fp, cmd)
-	struct bpf_d *d;
-	struct bpf_program *fp;
-	u_long cmd;
+bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
 {
 	struct bpf_insn *fcode, *old;
 	u_int wfilter, flen, size;
+#ifdef BPF_JITTER
+	bpf_jit_filter *ofunc;
+#endif
 
 	if (cmd == BIOCSETWF) {
 		old = d->bd_wfilter;
 		wfilter = 1;
+#ifdef BPF_JITTER
+		ofunc = NULL;
+#endif
 	} else {
 		wfilter = 0;
 		old = d->bd_rfilter;
+#ifdef BPF_JITTER
+		ofunc = d->bd_bfilter;
+#endif
 	}
 	if (fp->bf_insns == NULL) {
 		if (fp->bf_len != 0)
@@ -1032,12 +1087,20 @@
 		BPFD_LOCK(d);
 		if (wfilter)
 			d->bd_wfilter = NULL;
-		else
+		else {
 			d->bd_rfilter = NULL;
+#ifdef BPF_JITTER
+			d->bd_bfilter = NULL;
+#endif
+		}
 		reset_d(d);
 		BPFD_UNLOCK(d);
 		if (old != NULL)
 			free((caddr_t)old, M_BPF);
+#ifdef BPF_JITTER
+		if (ofunc != NULL)
+			bpf_destroy_jit_filter(ofunc);
+#endif
 		return (0);
 	}
 	flen = fp->bf_len;
@@ -1051,12 +1114,20 @@
 		BPFD_LOCK(d);
 		if (wfilter)
 			d->bd_wfilter = fcode;
-		else
+		else {
 			d->bd_rfilter = fcode;
+#ifdef BPF_JITTER
+			d->bd_bfilter = bpf_jitter(fcode, flen);
+#endif
+		}
 		reset_d(d);
 		BPFD_UNLOCK(d);
 		if (old != NULL)
 			free((caddr_t)old, M_BPF);
+#ifdef BPF_JITTER
+		if (ofunc != NULL)
+			bpf_destroy_jit_filter(ofunc);
+#endif
 
 		return (0);
 	}
@@ -1070,60 +1141,36 @@
  * Return an errno or 0.
  */
 static int
-bpf_setif(d, ifr)
-	struct bpf_d *d;
-	struct ifreq *ifr;
+bpf_setif(struct bpf_d *d, struct ifreq *ifr)
 {
 	struct bpf_if *bp;
-	int error;
 	struct ifnet *theywant;
 
 	theywant = ifunit(ifr->ifr_name);
-	if (theywant == NULL)
-		return ENXIO;
+	if (theywant == NULL || theywant->if_bpf == NULL)
+		return (ENXIO);
 
+	bp = theywant->if_bpf;
 	/*
-	 * Look through attached interfaces for the named one.
-	 */
-	mtx_lock(&bpf_mtx);
-	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-		struct ifnet *ifp = bp->bif_ifp;
-
-		if (ifp == NULL || ifp != theywant)
-			continue;
-		/* skip additional entry */
-		if (bp->bif_driverp != &ifp->if_bpf)
-			continue;
-
-		mtx_unlock(&bpf_mtx);
-		/*
-		 * We found the requested interface.
-		 * Allocate the packet buffers if we need to.
-		 * If we're already attached to requested interface,
-		 * just flush the buffer.
-		 */
-		if (d->bd_sbuf == NULL) {
-			error = bpf_allocbufs(d);
-			if (error != 0)
-				return (error);
-		}
-		if (bp != d->bd_bif) {
-			if (d->bd_bif)
-				/*
-				 * Detach if attached to something else.
-				 */
-				bpf_detachd(d);
+	 * Allocate the packet buffers if we need to.
+	 * If we're already attached to requested interface,
+	 * just flush the buffer.
+	 */
+	if (d->bd_sbuf == NULL)
+		bpf_allocbufs(d);
+	if (bp != d->bd_bif) {
+		if (d->bd_bif)
+			/*
+			 * Detach if attached to something else.
+			 */
+			bpf_detachd(d);
 
-			bpf_attachd(d, bp);
-		}
-		BPFD_LOCK(d);
-		reset_d(d);
-		BPFD_UNLOCK(d);
-		return (0);
+		bpf_attachd(d, bp);
 	}
-	mtx_unlock(&bpf_mtx);
-	/* Not found. */
-	return (ENXIO);
+	BPFD_LOCK(d);
+	reset_d(d);
+	BPFD_UNLOCK(d);
+	return (0);
 }
 
 /*
@@ -1133,10 +1180,7 @@
  * Otherwise, return false but make a note that a selwakeup() must be done.
  */
 static int
-bpfpoll(dev, events, td)
-	struct cdev *dev;
-	int events;
-	struct thread *td;
+bpfpoll(struct cdev *dev, int events, struct thread *td)
 {
 	struct bpf_d *d;
 	int revents;
@@ -1148,9 +1192,9 @@
 	/*
 	 * Refresh PID associated with this descriptor.
 	 */
-	d->bd_pid = td->td_proc->p_pid;
 	revents = events & (POLLOUT | POLLWRNORM);
 	BPFD_LOCK(d);
+	d->bd_pid = td->td_proc->p_pid;
 	if (events & (POLLIN | POLLRDNORM)) {
 		if (bpf_ready(d))
 			revents |= events & (POLLIN | POLLRDNORM);
@@ -1173,9 +1217,7 @@
  * reject all others.
  */
 int
-bpfkqfilter(dev, kn)
-	struct cdev *dev;
-	struct knote *kn;
+bpfkqfilter(struct cdev *dev, struct knote *kn)
 {
 	struct bpf_d *d = (struct bpf_d *)dev->si_drv1;
 
@@ -1185,17 +1227,18 @@
 	/* 
 	 * Refresh PID associated with this descriptor.
 	 */
+	BPFD_LOCK(d);
 	d->bd_pid = curthread->td_proc->p_pid;
 	kn->kn_fop = &bpfread_filtops;
 	kn->kn_hook = d;
-	knlist_add(&d->bd_sel.si_note, kn, 0);
+	knlist_add(&d->bd_sel.si_note, kn, 1);
+	BPFD_UNLOCK(d);
 
 	return (0);
 }
 
 static void
-filt_bpfdetach(kn)
-	struct knote *kn;
+filt_bpfdetach(struct knote *kn)
 {
 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
 
@@ -1203,9 +1246,7 @@
 }
 
 static int
-filt_bpfread(kn, hint)
-	struct knote *kn;
-	long hint;
+filt_bpfread(struct knote *kn, long hint)
 {
 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
 	int ready;
@@ -1233,32 +1274,34 @@
  * buffer.
  */
 void
-bpf_tap(bp, pkt, pktlen)
-	struct bpf_if *bp;
-	u_char *pkt;
-	u_int pktlen;
+bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
 {
 	struct bpf_d *d;
 	u_int slen;
+	int gottime;
+	struct timeval tv;
 
-	/*
-	 * Lockless read to avoid cost of locking the interface if there are
-	 * no descriptors attached.
-	 */
-	if (LIST_EMPTY(&bp->bif_dlist))
-		return;
-
+	gottime = 0;
 	BPFIF_LOCK(bp);
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		BPFD_LOCK(d);
 		++d->bd_rcount;
+#ifdef BPF_JITTER
+		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL)
+			slen = (*(d->bd_bfilter->func))(pkt, pktlen, pktlen);
+		else
+#endif
 		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
 		if (slen != 0) {
 			d->bd_fcount++;
+			if (!gottime) {
+				microtime(&tv);
+				gottime = 1;
+			}
 #ifdef MAC
 			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
 #endif
-				catchpacket(d, pkt, pktlen, slen, bcopy);
+				catchpacket(d, pkt, pktlen, slen, bcopy, &tv);
 		}
 		BPFD_UNLOCK(d);
 	}
@@ -1270,10 +1313,7 @@
  * from m_copydata in sys/uipc_mbuf.c.
  */
 static void
-bpf_mcopy(src_arg, dst_arg, len)
-	const void *src_arg;
-	void *dst_arg;
-	size_t len;
+bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
 {
 	const struct mbuf *m;
 	u_int count;
@@ -1292,40 +1332,56 @@
 	}
 }
 
+#define	BPF_CHECK_DIRECTION(d, m) \
+	if (((d)->bd_direction == BPF_D_IN && (m)->m_pkthdr.rcvif == NULL) || \
+	    ((d)->bd_direction == BPF_D_OUT && (m)->m_pkthdr.rcvif != NULL))
+
 /*
  * Incoming linkage from device drivers, when packet is in an mbuf chain.
  */
 void
-bpf_mtap(bp, m)
-	struct bpf_if *bp;
-	struct mbuf *m;
+bpf_mtap(struct bpf_if *bp, struct mbuf *m)
 {
 	struct bpf_d *d;
 	u_int pktlen, slen;
+	int gottime;
+	struct timeval tv;
 
-	/*
-	 * Lockless read to avoid cost of locking the interface if there are
-	 * no descriptors attached.
-	 */
-	if (LIST_EMPTY(&bp->bif_dlist))
+	if (m->m_flags & M_SKIP_BPF) {
+		m->m_flags &= ~M_SKIP_BPF;
 		return;
+	}
+
+	gottime = 0;
 
 	pktlen = m_length(m, NULL);
 
 	BPFIF_LOCK(bp);
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
-		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
+		BPF_CHECK_DIRECTION(d, m)
 			continue;
 		BPFD_LOCK(d);
 		++d->bd_rcount;
+#ifdef BPF_JITTER
+		/* XXX We cannot handle multiple mbufs. */
+		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL &&
+		    m->m_next == NULL)
+			slen = (*(d->bd_bfilter->func))(mtod(m, u_char *),
+			    pktlen, pktlen);
+		else
+#endif
 		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
 		if (slen != 0) {
 			d->bd_fcount++;
+			if (!gottime) {
+				microtime(&tv);
+				gottime = 1;
+			}
 #ifdef MAC
 			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
 #endif
 				catchpacket(d, (u_char *)m, pktlen, slen,
-				    bpf_mcopy);
+				    bpf_mcopy, &tv);
 		}
 		BPFD_UNLOCK(d);
 	}
@@ -1337,22 +1393,20 @@
  * an mbuf chain and to be prepended by a contiguous header.
  */
 void
-bpf_mtap2(bp, data, dlen, m)
-	struct bpf_if *bp;
-	void *data;
-	u_int dlen;
-	struct mbuf *m;
+bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
 {
 	struct mbuf mb;
 	struct bpf_d *d;
 	u_int pktlen, slen;
+	int gottime;
+	struct timeval tv;
 
-	/*
-	 * Lockless read to avoid cost of locking the interface if there are
-	 * no descriptors attached.
-	 */
-	if (LIST_EMPTY(&bp->bif_dlist))
+	if (m->m_flags & M_SKIP_BPF) {
+		m->m_flags &= ~M_SKIP_BPF;
 		return;
+	}
+
+	gottime = 0;
 
 	pktlen = m_length(m, NULL);
 	/*
@@ -1367,24 +1421,30 @@
 
 	BPFIF_LOCK(bp);
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
-		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
+		BPF_CHECK_DIRECTION(d, m)
 			continue;
 		BPFD_LOCK(d);
 		++d->bd_rcount;
 		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
 		if (slen != 0) {
 			d->bd_fcount++;
+			if (!gottime) {
+				microtime(&tv);
+				gottime = 1;
+			}
 #ifdef MAC
 			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
 #endif
 				catchpacket(d, (u_char *)&mb, pktlen, slen,
-				    bpf_mcopy);
+				    bpf_mcopy, &tv);
 		}
 		BPFD_UNLOCK(d);
 	}
 	BPFIF_UNLOCK(bp);
 }
 
+#undef	BPF_CHECK_DIRECTION
+
 /*
  * Move the packet data from interface memory (pkt) into the
  * store buffer.  "cpfn" is the routine called to do the actual data
@@ -1393,11 +1453,8 @@
  * pkt is really an mbuf.
  */
 static void
-catchpacket(d, pkt, pktlen, snaplen, cpfn)
-	struct bpf_d *d;
-	u_char *pkt;
-	u_int pktlen, snaplen;
-	void (*cpfn)(const void *, void *, size_t);
+catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
+    void (*cpfn)(const void *, void *, size_t), struct timeval *tv)
 {
 	struct bpf_hdr *hp;
 	int totlen, curlen;
@@ -1449,7 +1506,7 @@
 	 * Append the bpf header.
 	 */
 	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
-	microtime(&hp->bh_tstamp);
+	hp->bh_tstamp = *tv;
 	hp->bh_datalen = pktlen;
 	hp->bh_hdrlen = hdrlen;
 	/*
@@ -1465,22 +1522,18 @@
 /*
  * Initialize all nonzero fields of a descriptor.
  */
-static int
-bpf_allocbufs(d)
-	struct bpf_d *d;
+static void
+bpf_allocbufs(struct bpf_d *d)
 {
-	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
-	if (d->bd_fbuf == NULL)
-		return (ENOBUFS);
 
+	KASSERT(d->bd_fbuf == NULL, ("bpf_allocbufs: bd_fbuf != NULL"));
+	KASSERT(d->bd_sbuf == NULL, ("bpf_allocbufs: bd_sbuf != NULL"));
+	KASSERT(d->bd_hbuf == NULL, ("bpf_allocbufs: bd_hbuf != NULL"));
+
+	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
 	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
-	if (d->bd_sbuf == NULL) {
-		free(d->bd_fbuf, M_BPF);
-		return (ENOBUFS);
-	}
 	d->bd_slen = 0;
 	d->bd_hlen = 0;
-	return (0);
 }
 
 /*
@@ -1488,8 +1541,7 @@
  * Called on close.
  */
 static void
-bpf_freed(d)
-	struct bpf_d *d;
+bpf_freed(struct bpf_d *d)
 {
 	/*
 	 * We don't need to lock out interrupts since this descriptor has
@@ -1503,8 +1555,12 @@
 		if (d->bd_fbuf != NULL)
 			free(d->bd_fbuf, M_BPF);
 	}
-	if (d->bd_rfilter)
+	if (d->bd_rfilter) {
 		free((caddr_t)d->bd_rfilter, M_BPF);
+#ifdef BPF_JITTER
+		bpf_destroy_jit_filter(d->bd_bfilter);
+#endif
+	}
 	if (d->bd_wfilter)
 		free((caddr_t)d->bd_wfilter, M_BPF);
 	mtx_destroy(&d->bd_mtx);
@@ -1515,9 +1571,7 @@
  * fixed size of the link header (variable length headers not yet supported).
  */
 void
-bpfattach(ifp, dlt, hdrlen)
-	struct ifnet *ifp;
-	u_int dlt, hdrlen;
+bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
 {
 
 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
@@ -1530,28 +1584,25 @@
  * headers are not yet supporrted).
  */
 void
-bpfattach2(ifp, dlt, hdrlen, driverp)
-	struct ifnet *ifp;
-	u_int dlt, hdrlen;
-	struct bpf_if **driverp;
+bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
 {
 	struct bpf_if *bp;
-	bp = (struct bpf_if *)malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
+
+	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
 	if (bp == NULL)
 		panic("bpfattach");
 
 	LIST_INIT(&bp->bif_dlist);
-	bp->bif_driverp = driverp;
 	bp->bif_ifp = ifp;
 	bp->bif_dlt = dlt;
 	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
+	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
+	*driverp = bp;
 
 	mtx_lock(&bpf_mtx);
 	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
 	mtx_unlock(&bpf_mtx);
 
-	*bp->bif_driverp = NULL;
-
 	/*
 	 * Compute the length of the bpf header.  This is not necessarily
 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
@@ -1571,8 +1622,7 @@
  * ENXIO.
  */
 void
-bpfdetach(ifp)
-	struct ifnet *ifp;
+bpfdetach(struct ifnet *ifp)
 {
 	struct bpf_if	*bp;
 	struct bpf_d	*d;
@@ -1609,9 +1659,7 @@
  * Get a list of available data link type of the interface.
  */
 static int
-bpf_getdltlist(d, bfl)
-	struct bpf_d *d;
-	struct bpf_dltlist *bfl;
+bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
 {
 	int n, error;
 	struct ifnet *ifp;
@@ -1643,9 +1691,7 @@
  * Set the data link type of a BPF instance.
  */
 static int
-bpf_setdlt(d, dlt)
-	struct bpf_d *d;
-	u_int dlt;
+bpf_setdlt(struct bpf_d *d, u_int dlt)
 {
 	int error, opromisc;
 	struct ifnet *ifp;
@@ -1681,12 +1727,8 @@
 }
 
 static void
-bpf_clone(arg, cred, name, namelen, dev)
-	void *arg;
-	struct ucred *cred;
-	char *name;
-	int namelen;
-	struct cdev **dev;
+bpf_clone(void *arg, struct ucred *cred, char *name, int namelen,
+    struct cdev **dev)
 {
 	int u;
 
@@ -1702,8 +1744,7 @@
 }
 
 static void
-bpf_drvinit(unused)
-	void *unused;
+bpf_drvinit(void *unused)
 {
 
 	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
@@ -1720,7 +1761,8 @@
 	d->bd_immediate = bd->bd_immediate;
 	d->bd_promisc = bd->bd_promisc;
 	d->bd_hdrcmplt = bd->bd_hdrcmplt;
-	d->bd_seesent = bd->bd_seesent;
+	d->bd_direction = bd->bd_direction;
+	d->bd_feedback = bd->bd_feedback;
 	d->bd_async = bd->bd_async;
 	d->bd_rcount = bd->bd_rcount;
 	d->bd_dcount = bd->bd_dcount;
@@ -1732,7 +1774,6 @@
 	d->bd_pid = bd->bd_pid;
 	strlcpy(d->bd_ifname,
 	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
-	strlcpy(d->bd_pcomm, bd->bd_pcomm, MAXCOMLEN);
 	d->bd_locked = bd->bd_locked;
 }
 
@@ -1750,7 +1791,7 @@
 	 * if the users who opened the devices were able to retrieve
 	 * the statistics for them, too.
 	 */
-	error = suser(req->td);
+	error = priv_check(req->td, PRIV_NET_BPF);
 	if (error)
 		return (error);
 	if (req->oldptr == NULL)
@@ -1766,12 +1807,14 @@
 	}
 	index = 0;
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+		BPFIF_LOCK(bp);
 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
 			xbd = &xbdbuf[index++];
 			BPFD_LOCK(bd);
 			bpfstats_fill_xbpf(xbd, bd);
 			BPFD_UNLOCK(bd);
 		}
+		BPFIF_UNLOCK(bp);
 	}
 	mtx_unlock(&bpf_mtx);
 	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
@@ -1788,66 +1831,50 @@
  * A 'better' implementation would allow the core bpf functionality
  * to be loaded at runtime.
  */
+static struct bpf_if bp_null;
 
 void
-bpf_tap(bp, pkt, pktlen)
-	struct bpf_if *bp;
-	u_char *pkt;
-	u_int pktlen;
+bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
 {
 }
 
 void
-bpf_mtap(bp, m)
-	struct bpf_if *bp;
-	struct mbuf *m;
+bpf_mtap(struct bpf_if *bp, struct mbuf *m)
 {
 }
 
 void
-bpf_mtap2(bp, d, l, m)
-	struct bpf_if *bp;
-	void *d;
-	u_int l;
-	struct mbuf *m;
+bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
 {
 }
 
 void
-bpfattach(ifp, dlt, hdrlen)
-	struct ifnet *ifp;
-	u_int dlt, hdrlen;
+bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
 {
+
+	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
 }
 
 void
-bpfattach2(ifp, dlt, hdrlen, driverp)
-	struct ifnet *ifp;
-	u_int dlt, hdrlen;
-	struct bpf_if **driverp;
+bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
 {
+
+	*driverp = &bp_null;
 }
 
 void
-bpfdetach(ifp)
-	struct ifnet *ifp;
+bpfdetach(struct ifnet *ifp)
 {
 }
 
 u_int
-bpf_filter(pc, p, wirelen, buflen)
-	const struct bpf_insn *pc;
-	u_char *p;
-	u_int wirelen;
-	u_int buflen;
+bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
 {
 	return -1;	/* "no filter" behaviour */
 }
 
 int
-bpf_validate(f, len)
-	const struct bpf_insn *f;
-	int len;
+bpf_validate(const struct bpf_insn *f, int len)
 {
 	return 0;		/* false */
 }
Index: if_var.h
===================================================================
RCS file: /home/cvs/src/sys/net/if_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_var.h -L sys/net/if_var.h -u -r1.1.1.1 -r1.2
--- sys/net/if_var.h
+++ sys/net/if_var.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	From: @(#)if.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/net/if_var.h,v 1.98.2.5 2005/10/07 14:00:05 glebius Exp $
+ * $FreeBSD: src/sys/net/if_var.h,v 1.115.2.1 2007/12/07 05:46:08 kmacy Exp $
  */
 
 #ifndef	_NET_IF_VAR_H_
@@ -69,6 +69,7 @@
 struct	socket;
 struct	ether_header;
 struct	carp_if;
+struct  ifvlantrunk;
 #endif
 
 #include <sys/queue.h>		/* get TAILQ macros */
@@ -90,6 +91,7 @@
 TAILQ_HEAD(ifaddrhead, ifaddr);	/* instantiation is preserved in the list */
 TAILQ_HEAD(ifprefixhead, ifprefix);
 TAILQ_HEAD(ifmultihead, ifmultiaddr);
+TAILQ_HEAD(ifgrouphead, ifg_group);
 
 /*
  * Structure defining a queue for a network interface.
@@ -125,7 +127,7 @@
 		 * addresses which store the link-level address and the name
 		 * of the interface.
 		 * However, access to the AF_LINK address through this
-		 * field is deprecated. Use ifaddr_byindex() instead.
+		 * field is deprecated. Use if_addr or ifaddr_byindex() instead.
 		 */
 	struct	knlist if_klist;	/* events attached to this if */
 	int	if_pcount;		/* number of promiscuous listeners */
@@ -133,10 +135,10 @@
 	struct	bpf_if *if_bpf;		/* packet filter structure */
 	u_short	if_index;		/* numeric abbreviation for this if  */
 	short	if_timer;		/* time 'til if_watchdog called */
-	u_short	if_nvlans;		/* number of active vlans */
+	struct  ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */
 	int	if_flags;		/* up/down, broadcast, etc. */
-	int	if_capabilities;	/* interface capabilities */
-	int	if_capenable;		/* enabled features */
+	int	if_capabilities;	/* interface features & capabilities */
+	int	if_capenable;		/* enabled features & capabilities */
 	void	*if_linkmib;		/* link-type-specific MIB data */
 	size_t	if_linkmiblen;		/* length of above data */
 	struct	if_data if_data;
@@ -158,9 +160,8 @@
 		(void *);
 	int	(*if_resolvemulti)	/* validate/resolve multicast */
 		(struct ifnet *, struct sockaddr **, struct sockaddr *);
-	void	*if_spare1;		/* spare pointer 1 */
-	void	*if_spare2;		/* spare pointer 2 */
-	void	*if_spare3;		/* spare pointer 3 */
+	struct	ifaddr	*if_addr;	/* pointer to link-level address */
+	void	*if_llsoftc;		/* link layer softc */
 	int	if_drv_flags;		/* driver-managed status flags */
 	u_int	if_spare_flags2;	/* spare flags 2 */
 	struct  ifaltq if_snd;		/* output queue (includes altq) */
@@ -180,6 +181,13 @@
 	struct	task if_starttask;	/* task for IFF_NEEDSGIANT */
 	struct	task if_linktask;	/* task for link change events */
 	struct	mtx if_addr_mtx;	/* mutex to protect address lists */
+	LIST_ENTRY(ifnet) if_clones;	/* interfaces of a cloner */
+	TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
+					/* protected by if_addr_mtx */
+	void	*if_pf_kif;
+	void	*if_lagg;		/* lagg glue */
+	void	*if_pspare[10];		/* multiq/TOE 3; vimage 3; general use 4 */
+	int	if_ispare[2];		/* general use 2 */
 };
 
 typedef void if_init_f_t(void *);
@@ -209,13 +217,12 @@
 #define	if_iqdrops	if_data.ifi_iqdrops
 #define	if_noproto	if_data.ifi_noproto
 #define	if_lastchange	if_data.ifi_lastchange
-#define if_recvquota	if_data.ifi_recvquota
-#define	if_xmitquota	if_data.ifi_xmitquota
 #define if_rawoutput(if, m, sa) if_output(if, m, sa, (struct rtentry *)NULL)
 
 /* for compatibility with other BSDs */
 #define	if_addrlist	if_addrhead
 #define	if_list		if_link
+#define	if_name(ifp)	((ifp)->if_xname)
 
 /*
  * Locks for address lists on the network interface.
@@ -316,6 +323,37 @@
 typedef void (*ifnet_departure_event_handler_t)(void *, struct ifnet *);
 EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t);
 
+/*
+ * interface groups
+ */
+struct ifg_group {
+	char				 ifg_group[IFNAMSIZ];
+	u_int				 ifg_refcnt;
+	void				*ifg_pf_kif;
+	TAILQ_HEAD(, ifg_member)	 ifg_members;
+	TAILQ_ENTRY(ifg_group)		 ifg_next;
+};
+
+struct ifg_member {
+	TAILQ_ENTRY(ifg_member)	 ifgm_next;
+	struct ifnet		*ifgm_ifp;
+};
+
+struct ifg_list {
+	struct ifg_group	*ifgl_group;
+	TAILQ_ENTRY(ifg_list)	 ifgl_next;
+};
+
+/* group attach event */
+typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *);
+EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t);
+/* group detach event */
+typedef void (*group_detach_event_handler_t)(void *, struct ifg_group *);
+EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t);
+/* group change event */
+typedef void (*group_change_event_handler_t)(void *, const char *);
+EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
+
 #define	IF_AFDATA_LOCK_INIT(ifp)	\
     mtx_init(&(ifp)->if_afdata_mtx, "if_afdata", NULL, MTX_DEF)
 #define	IF_AFDATA_LOCK(ifp)	mtx_lock(&(ifp)->if_afdata_mtx)
@@ -562,8 +600,6 @@
 /*
  * Multicast address structure.  This is analogous to the ifaddr
  * structure except that it keeps track of multicast addresses.
- * Also, the reference count here is a count of requests for this
- * address, not a count of pointers to this structure.
  */
 struct ifmultiaddr {
 	TAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */
@@ -572,6 +608,7 @@
 	struct	ifnet *ifma_ifp;	/* back-pointer to interface */
 	u_int	ifma_refcount;		/* reference count */
 	void	*ifma_protospec;	/* protocol-specific state, if any */
+	struct	ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */
 };
 
 #ifdef _KERNEL
@@ -604,7 +641,6 @@
 
 struct ifindex_entry {
 	struct	ifnet *ife_ifnet;
-	struct	ifaddr *ife_ifnet_addr;
 	struct cdev *ife_dev;
 };
 
@@ -614,7 +650,7 @@
  * link-level ifaddr for the interface. You are not supposed to use
  * it to traverse the list of addresses associated to the interface.
  */
-#define ifaddr_byindex(idx)	ifindex_table[(idx)].ife_ifnet_addr
+#define ifaddr_byindex(idx)	ifnet_byindex(idx)->if_addr
 #define ifdev_byindex(idx)	ifindex_table[(idx)].ife_dev
 
 extern	struct ifnethead ifnet;
@@ -623,14 +659,19 @@
 extern	struct ifnet *loif;	/* first loopback interface */
 extern	int if_index;
 
+int	if_addgroup(struct ifnet *, const char *);
+int	if_delgroup(struct ifnet *, const char *);
 int	if_addmulti(struct ifnet *, struct sockaddr *, struct ifmultiaddr **);
 int	if_allmulti(struct ifnet *, int);
 struct	ifnet* if_alloc(u_char);
 void	if_attach(struct ifnet *);
 int	if_delmulti(struct ifnet *, struct sockaddr *);
+void	if_delmulti_ifma(struct ifmultiaddr *);
 void	if_detach(struct ifnet *);
 void	if_purgeaddrs(struct ifnet *);
 void	if_down(struct ifnet *);
+struct ifmultiaddr *
+	if_findmulti(struct ifnet *, struct sockaddr *);
 void	if_free(struct ifnet *);
 void	if_free_type(struct ifnet *, u_char);
 void	if_initname(struct ifnet *, const char *, int);
@@ -644,6 +685,7 @@
 struct	ifnet *ifunit(const char *);
 
 struct	ifaddr *ifa_ifwithaddr(struct sockaddr *);
+struct	ifaddr *ifa_ifwithbroadaddr(struct sockaddr *);
 struct	ifaddr *ifa_ifwithdstaddr(struct sockaddr *);
 struct	ifaddr *ifa_ifwithnet(struct sockaddr *);
 struct	ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *);
@@ -657,7 +699,7 @@
 void	if_deregister_com_alloc(u_char type);
 
 #define IF_LLADDR(ifp)							\
-    LLADDR((struct sockaddr_dl *) ifaddr_byindex((ifp)->if_index)->ifa_addr)
+    LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr))
 
 #ifdef DEVICE_POLLING
 enum poll_cmd {	POLL_ONLY, POLL_AND_CHECK_STATUS };
--- /dev/null
+++ sys/net/if_enc.c
@@ -0,0 +1,310 @@
+/*-
+ * Copyright (c) 2006 The FreeBSD Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/net/if_enc.c,v 1.6.4.1 2007/12/29 17:28:38 thompsa Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_clone.h>
+#include <net/if_types.h>
+#include <net/pfil.h>
+#include <net/route.h>
+#include <net/netisr.h>
+#include <net/bpf.h>
+#include <net/bpfdesc.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/in_var.h>
+#include "opt_inet6.h"
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#endif
+
+#include <netipsec/ipsec.h>
+
+#define ENCMTU		(1024+512)
+
+/* XXX this define must have the same value as in OpenBSD */
+#define M_CONF		0x0400	/* payload was encrypted (ESP-transport) */
+#define M_AUTH		0x0800	/* payload was authenticated (AH or ESP auth) */
+#define M_AUTH_AH	0x2000	/* header was authenticated (AH) */
+
+struct enchdr {
+	u_int32_t af;
+	u_int32_t spi;
+	u_int32_t flags;
+};
+
+static struct ifnet	*encif;
+static struct mtx	enc_mtx;
+
+struct enc_softc {
+	struct	ifnet *sc_ifp;
+};
+
+static int	enc_ioctl(struct ifnet *, u_long, caddr_t);
+static int	enc_output(struct ifnet *ifp, struct mbuf *m,
+		    struct sockaddr *dst, struct rtentry *rt);
+static int	enc_clone_create(struct if_clone *, int, caddr_t);
+static void	enc_clone_destroy(struct ifnet *);
+
+IFC_SIMPLE_DECLARE(enc, 1);
+
+static void
+enc_clone_destroy(struct ifnet *ifp)
+{
+	KASSERT(ifp != encif, ("%s: destroying encif", __func__));
+
+	bpfdetach(ifp);
+	if_detach(ifp);
+	if_free(ifp);
+}
+
+static int
+enc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+	struct ifnet *ifp;
+	struct enc_softc *sc;
+
+	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+	ifp = sc->sc_ifp = if_alloc(IFT_ENC);
+	if (ifp == NULL) {
+		free(sc, M_DEVBUF);
+		return (ENOSPC);
+	}
+
+	if_initname(ifp, ifc->ifc_name, unit);
+	ifp->if_mtu = ENCMTU;
+	ifp->if_ioctl = enc_ioctl;
+	ifp->if_output = enc_output;
+	ifp->if_snd.ifq_maxlen = ifqmaxlen;
+	ifp->if_softc = sc;
+	if_attach(ifp);
+	bpfattach(ifp, DLT_ENC, sizeof(struct enchdr));
+
+	mtx_lock(&enc_mtx);
+	/* grab a pointer to enc0, ignore the rest */
+	if (encif == NULL)
+		encif = ifp;
+	mtx_unlock(&enc_mtx);
+
+	return (0);
+}
+
+static int
+enc_modevent(module_t mod, int type, void *data)
+{
+	switch (type) {
+	case MOD_LOAD:
+		mtx_init(&enc_mtx, "enc mtx", NULL, MTX_DEF);
+		if_clone_attach(&enc_cloner);
+		break;
+	case MOD_UNLOAD:
+		printf("enc module unload - not possible for this module\n");
+		return (EINVAL);
+	default:
+		return (EOPNOTSUPP);
+	}
+	return (0);
+}
+
+static moduledata_t enc_mod = {
+	"enc",
+	enc_modevent,
+	0
+};
+
+DECLARE_MODULE(enc, enc_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+
+static int
+enc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+    struct rtentry *rt)
+{
+	m_freem(m);
+	return (0);
+}
+
+/*
+ * Process an ioctl request.
+ */
+/* ARGSUSED */
+static int
+enc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	int error = 0;
+
+	mtx_lock(&enc_mtx);
+
+	switch (cmd) {
+
+	case SIOCSIFFLAGS:
+		if (ifp->if_flags & IFF_UP)
+			ifp->if_drv_flags |= IFF_DRV_RUNNING;
+		else
+			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+
+		break;
+
+	default:
+		error = EINVAL;
+	}
+
+	mtx_unlock(&enc_mtx);
+	return (error);
+}
+
+int
+ipsec_filter(struct mbuf **mp, int dir)
+{
+	int error, i;
+	struct ip *ip;
+
+	KASSERT(encif != NULL, ("%s: encif is null", __func__));
+
+	if ((encif->if_drv_flags & IFF_DRV_RUNNING) == 0)
+		return (0);
+
+	/* Skip pfil(9) if no filters are loaded */
+	if (!(PFIL_HOOKED(&inet_pfil_hook)
+#ifdef INET6
+	    || PFIL_HOOKED(&inet6_pfil_hook)
+#endif
+	    )) {
+		return (0);
+	}
+
+	i = min((*mp)->m_pkthdr.len, max_protohdr);
+	if ((*mp)->m_len < i) {
+		*mp = m_pullup(*mp, i);
+		if (*mp == NULL) {
+			printf("%s: m_pullup failed\n", __func__);
+			return (-1);
+		}
+	}
+
+	error = 0;
+	ip = mtod(*mp, struct ip *);
+	switch (ip->ip_v) {
+		case 4:
+			/*
+			 * before calling the firewall, swap fields the same as
+			 * IP does. here we assume the header is contiguous
+			 */
+			ip->ip_len = ntohs(ip->ip_len);
+			ip->ip_off = ntohs(ip->ip_off);
+
+			error = pfil_run_hooks(&inet_pfil_hook, mp,
+			    encif, dir, NULL);
+
+			if (*mp == NULL || error != 0)
+				break;
+
+			/* restore byte ordering */
+			ip = mtod(*mp, struct ip *);
+			ip->ip_len = htons(ip->ip_len);
+			ip->ip_off = htons(ip->ip_off);
+			break;
+
+#ifdef INET6
+		case 6:
+			error = pfil_run_hooks(&inet6_pfil_hook, mp,
+			    encif, dir, NULL);
+			break;
+#endif
+		default:
+			printf("%s: unknown IP version\n", __func__);
+	}
+
+	/*
+	 * If the mbuf was consumed by the filter for requeueing (dummynet, etc)
+	 * then error will be zero but we still want to return an error to our
+	 * caller so the null mbuf isn't forwarded further.
+	 */
+	if (*mp == NULL && error == 0)
+		return (-1);	/* Consumed by the filter */
+	if (*mp == NULL)
+		return (error);
+	if (error != 0)
+		goto bad;
+
+	return (error);
+
+bad:
+	m_freem(*mp);
+	*mp = NULL;
+	return (error);
+}
+
+void
+ipsec_bpf(struct mbuf *m, struct secasvar *sav, int af)
+{
+	int flags;
+	struct enchdr hdr;
+
+	KASSERT(encif != NULL, ("%s: encif is null", __func__));
+	KASSERT(sav != NULL, ("%s: sav is null", __func__));
+
+	if ((encif->if_drv_flags & IFF_DRV_RUNNING) == 0)
+		return;
+
+	if (bpf_peers_present(encif->if_bpf)) {
+		flags = 0;
+		if (sav->alg_enc != SADB_EALG_NONE)
+			flags |= M_CONF;
+		if (sav->alg_auth != SADB_AALG_NONE)
+			flags |= M_AUTH;
+
+		/*
+		 * We need to prepend the address family as a four byte
+		 * field.  Cons up a dummy header to pacify bpf.  This
+		 * is safe because bpf will only read from the mbuf
+		 * (i.e., it won't try to free it or keep a pointer a
+		 * to it).
+		 */
+		hdr.af = af;
+		hdr.spi = sav->spi;
+		hdr.flags = flags;
+
+		bpf_mtap2(encif->if_bpf, &hdr, sizeof(hdr), m);
+	}
+}
Index: if_ef.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_ef.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/net/if_ef.c -L sys/net/if_ef.c -u -r1.1.1.2 -r1.2
--- sys/net/if_ef.c
+++ sys/net/if_ef.c
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/net/if_ef.c,v 1.34.2.3 2006/02/14 21:11:19 rwatson Exp $
+ * $FreeBSD: src/sys/net/if_ef.c,v 1.39 2006/01/18 14:24:39 andre Exp $
  */
 
 #include "opt_inet.h"
@@ -126,8 +126,6 @@
 ef_attach(struct efnet *sc)
 {
 	struct ifnet *ifp = sc->ef_ifp;
-	struct ifaddr *ifa2;
-	struct sockaddr_dl *sdl2;
 
 	ifp->if_start = ef_start;
 	ifp->if_watchdog = NULL;
@@ -137,9 +135,7 @@
 	/*
 	 * Attach the interface
 	 */
-	ifa2 = ifaddr_byindex(sc->ef_pifp->if_index);
-	sdl2 = (struct sockaddr_dl *)ifa2->ifa_addr;
-	ether_ifattach(ifp, LLADDR(sdl2));
+	ether_ifattach(ifp, IF_LLADDR(sc->ef_pifp));
 
 	ifp->if_resolvemulti = 0;
 	ifp->if_type = IFT_XETHER;
@@ -251,7 +247,7 @@
 #endif
 #ifdef INET
 	case ETHERTYPE_IP:
-		if (ip_fastforward(m))
+		if ((m = ip_fastforward(m)) == NULL)
 			return (0);
 		isr = NETISR_IP;
 		break;
Index: bpf_filter.c
===================================================================
RCS file: /home/cvs/src/sys/net/bpf_filter.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/bpf_filter.c -L sys/net/bpf_filter.c -u -r1.1.1.1 -r1.2
--- sys/net/bpf_filter.c
+++ sys/net/bpf_filter.c
@@ -33,7 +33,7 @@
  *
  *      @(#)bpf_filter.c	8.1 (Berkeley) 6/10/93
  *
- * $FreeBSD: src/sys/net/bpf_filter.c,v 1.23 2005/01/07 01:45:34 imp Exp $
+ * $FreeBSD: src/sys/net/bpf_filter.c,v 1.28 2007/09/13 09:00:32 dwmalone Exp $
  */
 
 #include <sys/param.h>
@@ -186,7 +186,7 @@
 {
 	register u_int32_t A = 0, X = 0;
 	register bpf_u_int32 k;
-	int32_t mem[BPF_MEMWORDS];
+	u_int32_t mem[BPF_MEMWORDS];
 
 	if (pc == 0)
 		/*
@@ -334,7 +334,7 @@
 					return 0;
 				m = (struct mbuf *)p;
 				MINDEX(m, k);
-				A = mtod(m, char *)[k];
+				A = mtod(m, u_char *)[k];
 				continue;
 #else
 				return 0;
@@ -353,7 +353,7 @@
 					return 0;
 				m = (struct mbuf *)p;
 				MINDEX(m, k);
-				X = (mtod(m, char *)[k] & 0xf) << 2;
+				X = (mtod(m, u_char *)[k] & 0xf) << 2;
 				continue;
 #else
 				return 0;
@@ -520,6 +520,14 @@
 	register int i;
 	register const struct bpf_insn *p;
 
+	/* Do not accept negative length filter. */
+	if (len < 0)
+		return 0;
+
+	/* An empty filter means accept all. */
+	if (len == 0)
+		return 1;
+
 	for (i = 0; i < len; ++i) {
 		/*
 		 * Check that that jumps are forward, and within
Index: if_types.h
===================================================================
RCS file: /home/cvs/src/sys/net/if_types.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_types.h -L sys/net/if_types.h -u -r1.1.1.1 -r1.2
--- sys/net/if_types.h
+++ sys/net/if_types.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_types.h	8.3 (Berkeley) 4/28/95
- * $FreeBSD: src/sys/net/if_types.h,v 1.21 2005/06/10 16:49:19 brooks Exp $
+ * $FreeBSD: src/sys/net/if_types.h,v 1.23 2007/06/13 14:01:43 rwatson Exp $
  * $NetBSD: if_types.h,v 1.16 2000/04/19 06:30:53 itojun Exp $
  */
 
@@ -246,8 +246,9 @@
 #define	IFT_GIF		0xf0
 #define	IFT_PVC		0xf1
 #define	IFT_FAITH	0xf2
+#define	IFT_ENC		0xf4
 #define	IFT_PFLOG	0xf6
 #define	IFT_PFSYNC	0xf7
 #define	IFT_CARP	0xf8	/* Common Address Redundancy Protocol */
-#define IFT_IPXIP	0xf9
+#define IFT_IPXIP	0xf9	/* IPX over IP tunneling; no longer used. */
 #endif /* !_NET_IF_TYPES_H_ */
--- /dev/null
+++ sys/net/ieee8023ad_lacp.c
@@ -0,0 +1,1964 @@
+/*	$NetBSD: ieee8023ad_lacp.c,v 1.3 2005/12/11 12:24:54 christos Exp $	*/
+
+/*-
+ * Copyright (c)2005 YAMAMOTO Takashi,
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/net/ieee8023ad_lacp.c,v 1.12 2007/07/05 09:18:57 thompsa Exp $");
+
+#include <sys/param.h>
+#include <sys/callout.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h> /* hz */
+#include <sys/socket.h> /* for net/if.h */
+#include <sys/sockio.h>
+#include <machine/stdarg.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/taskqueue.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/ethernet.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+
+#include <net/if_lagg.h>
+#include <net/ieee8023ad_lacp.h>
+
+/*
+ * actor system priority and port priority.
+ * XXX should be configurable.
+ */
+
+#define	LACP_SYSTEM_PRIO	0x8000
+#define	LACP_PORT_PRIO		0x8000
+
+const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] =
+    { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
+
+static const struct tlv_template lacp_info_tlv_template[] = {
+	{ LACP_TYPE_ACTORINFO,
+	    sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
+	{ LACP_TYPE_PARTNERINFO,
+	    sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
+	{ LACP_TYPE_COLLECTORINFO,
+	    sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) },
+	{ 0, 0 },
+};
+
+typedef void (*lacp_timer_func_t)(struct lacp_port *);
+
+static const struct tlv_template marker_info_tlv_template[] = {
+	{ MARKER_TYPE_INFO,
+	    sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) },
+	{ 0, 0 },
+};
+
+static const struct tlv_template marker_response_tlv_template[] = {
+	{ MARKER_TYPE_RESPONSE,
+	    sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) },
+	{ 0, 0 },
+};
+
+static void	lacp_fill_actorinfo(struct lacp_port *, struct lacp_peerinfo *);
+static void	lacp_fill_markerinfo(struct lacp_port *,
+		    struct lacp_markerinfo *);
+
+static uint64_t	lacp_aggregator_bandwidth(struct lacp_aggregator *);
+static void	lacp_suppress_distributing(struct lacp_softc *,
+		    struct lacp_aggregator *);
+static void	lacp_transit_expire(void *);
+static void	lacp_select_active_aggregator(struct lacp_softc *);
+static uint16_t	lacp_compose_key(struct lacp_port *);
+static int	tlv_check(const void *, size_t, const struct tlvhdr *,
+		    const struct tlv_template *, boolean_t);
+static void	lacp_tick(void *);
+
+static void	lacp_fill_aggregator_id(struct lacp_aggregator *,
+		    const struct lacp_port *);
+static void	lacp_fill_aggregator_id_peer(struct lacp_peerinfo *,
+		    const struct lacp_peerinfo *);
+static int	lacp_aggregator_is_compatible(const struct lacp_aggregator *,
+		    const struct lacp_port *);
+static int	lacp_peerinfo_is_compatible(const struct lacp_peerinfo *,
+		    const struct lacp_peerinfo *);
+
+static struct lacp_aggregator *lacp_aggregator_get(struct lacp_softc *,
+		    struct lacp_port *);
+static void	lacp_aggregator_addref(struct lacp_softc *,
+		    struct lacp_aggregator *);
+static void	lacp_aggregator_delref(struct lacp_softc *,
+		    struct lacp_aggregator *);
+
+/* receive machine */
+
+static void	lacp_dequeue(void *, int);
+static int	lacp_pdu_input(struct lagg_port *, struct mbuf *);
+static int	lacp_marker_input(struct lagg_port *, struct mbuf *);
+static void	lacp_sm_rx(struct lacp_port *, const struct lacpdu *);
+static void	lacp_sm_rx_timer(struct lacp_port *);
+static void	lacp_sm_rx_set_expired(struct lacp_port *);
+static void	lacp_sm_rx_update_ntt(struct lacp_port *,
+		    const struct lacpdu *);
+static void	lacp_sm_rx_record_pdu(struct lacp_port *,
+		    const struct lacpdu *);
+static void	lacp_sm_rx_update_selected(struct lacp_port *,
+		    const struct lacpdu *);
+static void	lacp_sm_rx_record_default(struct lacp_port *);
+static void	lacp_sm_rx_update_default_selected(struct lacp_port *);
+static void	lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *,
+		    const struct lacp_peerinfo *);
+
+/* mux machine */
+
+static void	lacp_sm_mux(struct lacp_port *);
+static void	lacp_set_mux(struct lacp_port *, enum lacp_mux_state);
+static void	lacp_sm_mux_timer(struct lacp_port *);
+
+/* periodic transmit machine */
+
+static void	lacp_sm_ptx_update_timeout(struct lacp_port *, uint8_t);
+static void	lacp_sm_ptx_tx_schedule(struct lacp_port *);
+static void	lacp_sm_ptx_timer(struct lacp_port *);
+
+/* transmit machine */
+
+static void	lacp_sm_tx(struct lacp_port *);
+static void	lacp_sm_assert_ntt(struct lacp_port *);
+
+static void	lacp_run_timers(struct lacp_port *);
+static int	lacp_compare_peerinfo(const struct lacp_peerinfo *,
+		    const struct lacp_peerinfo *);
+static int	lacp_compare_systemid(const struct lacp_systemid *,
+		    const struct lacp_systemid *);
+static void	lacp_port_enable(struct lacp_port *);
+static void	lacp_port_disable(struct lacp_port *);
+static void	lacp_select(struct lacp_port *);
+static void	lacp_unselect(struct lacp_port *);
+static void	lacp_disable_collecting(struct lacp_port *);
+static void	lacp_enable_collecting(struct lacp_port *);
+static void	lacp_disable_distributing(struct lacp_port *);
+static void	lacp_enable_distributing(struct lacp_port *);
+static int	lacp_xmit_lacpdu(struct lacp_port *);
+static int	lacp_xmit_marker(struct lacp_port *);
+
+#if defined(LACP_DEBUG)
+static void	lacp_dump_lacpdu(const struct lacpdu *);
+static const char *lacp_format_partner(const struct lacp_peerinfo *, char *,
+		    size_t);
+static const char *lacp_format_lagid(const struct lacp_peerinfo *,
+		    const struct lacp_peerinfo *, char *, size_t);
+static const char *lacp_format_lagid_aggregator(const struct lacp_aggregator *,
+		    char *, size_t);
+static const char *lacp_format_state(uint8_t, char *, size_t);
+static const char *lacp_format_mac(const uint8_t *, char *, size_t);
+static const char *lacp_format_systemid(const struct lacp_systemid *, char *,
+		    size_t);
+static const char *lacp_format_portid(const struct lacp_portid *, char *,
+		    size_t);
+static void	lacp_dprintf(const struct lacp_port *, const char *, ...)
+		    __attribute__((__format__(__printf__, 2, 3)));
+#define	LACP_DPRINTF(a)	lacp_dprintf a
+#else
+#define LACP_DPRINTF(a) /* nothing */
+#endif
+
+/*
+ * partner administration variables.
+ * XXX should be configurable.
+ */
+
+static const struct lacp_peerinfo lacp_partner_admin = {
+	.lip_systemid = { .lsi_prio = 0xffff },
+	.lip_portid = { .lpi_prio = 0xffff },
+#if 1
+	/* optimistic */
+	.lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
+	    LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
+#else
+	/* pessimistic */
+	.lip_state = 0,
+#endif
+};
+
+static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
+	[LACP_TIMER_CURRENT_WHILE] = lacp_sm_rx_timer,
+	[LACP_TIMER_PERIODIC] = lacp_sm_ptx_timer,
+	[LACP_TIMER_WAIT_WHILE] = lacp_sm_mux_timer,
+};
+
+void
+lacp_input(struct lagg_port *lgp, struct mbuf *m)
+{
+	struct lagg_softc *sc = lgp->lp_softc;
+	struct lacp_softc *lsc = LACP_SOFTC(sc);
+	uint8_t subtype;
+
+	if (m->m_pkthdr.len < sizeof(struct ether_header) + sizeof(subtype)) {
+		m_freem(m);
+		return;
+	}
+
+	m_copydata(m, sizeof(struct ether_header), sizeof(subtype), &subtype);
+	switch (subtype) {
+		case SLOWPROTOCOLS_SUBTYPE_LACP:
+			IF_HANDOFF(&lsc->lsc_queue, m, NULL);
+			taskqueue_enqueue(taskqueue_swi, &lsc->lsc_qtask);
+			break;
+
+		case SLOWPROTOCOLS_SUBTYPE_MARKER:
+			lacp_marker_input(lgp, m);
+			break;
+
+		default:
+			/* Unknown LACP packet type */
+			m_freem(m);
+			break;
+	}
+}
+
+static void
+lacp_dequeue(void *arg, int pending)
+{
+	struct lacp_softc *lsc = (struct lacp_softc *)arg;
+	struct lagg_softc *sc = lsc->lsc_softc;
+	struct lagg_port *lgp;
+	struct mbuf *m;
+
+	LAGG_WLOCK(sc);
+	for (;;) {
+		IF_DEQUEUE(&lsc->lsc_queue, m);
+		if (m == NULL)
+			break;
+		lgp = m->m_pkthdr.rcvif->if_lagg;
+		lacp_pdu_input(lgp, m);
+	}
+	LAGG_WUNLOCK(sc);
+}
+
+/*
+ * lacp_pdu_input: process lacpdu
+ */
+static int
+lacp_pdu_input(struct lagg_port *lgp, struct mbuf *m)
+{
+	struct lacp_port *lp = LACP_PORT(lgp);
+	struct lacpdu *du;
+	int error = 0;
+
+	LAGG_WLOCK_ASSERT(lgp->lp_softc);
+
+	if (m->m_pkthdr.len != sizeof(*du)) {
+		goto bad;
+	}
+
+	if ((m->m_flags & M_MCAST) == 0) {
+		goto bad;
+	}
+
+	if (m->m_len < sizeof(*du)) {
+		m = m_pullup(m, sizeof(*du));
+		if (m == NULL) {
+			return (ENOMEM);
+		}
+	}
+
+	du = mtod(m, struct lacpdu *);
+
+	if (memcmp(&du->ldu_eh.ether_dhost,
+	    &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
+		goto bad;
+	}
+
+	/*
+	 * ignore the version for compatibility with
+	 * the future protocol revisions.
+	 */
+#if 0
+	if (du->ldu_sph.sph_version != 1) {
+		goto bad;
+	}
+#endif
+
+	/*
+	 * ignore tlv types for compatibility with
+	 * the future protocol revisions.
+	 */
+	if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor,
+	    lacp_info_tlv_template, FALSE)) {
+		goto bad;
+	}
+
+#if defined(LACP_DEBUG)
+	LACP_DPRINTF((lp, "lacpdu receive\n"));
+	lacp_dump_lacpdu(du);
+#endif /* defined(LACP_DEBUG) */
+	lacp_sm_rx(lp, du);
+
+	m_freem(m);
+
+	return (error);
+
+bad:
+	m_freem(m);
+	return (EINVAL);
+}
+
+static void
+lacp_fill_actorinfo(struct lacp_port *lp, struct lacp_peerinfo *info)
+{
+	struct lagg_port *lgp = lp->lp_lagg;
+	struct lagg_softc *sc = lgp->lp_softc;
+
+	info->lip_systemid.lsi_prio = htons(LACP_SYSTEM_PRIO);
+	memcpy(&info->lip_systemid.lsi_mac,
+	    IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+	info->lip_portid.lpi_prio = htons(LACP_PORT_PRIO);
+	info->lip_portid.lpi_portno = htons(lp->lp_ifp->if_index);
+	info->lip_state = lp->lp_state;
+}
+
+static void
+lacp_fill_markerinfo(struct lacp_port *lp, struct lacp_markerinfo *info)
+{
+	struct ifnet *ifp = lp->lp_ifp;
+
+	/* Fill in the port index and system id (encoded as the MAC) */
+	info->mi_rq_port = htons(ifp->if_index);
+	memcpy(&info->mi_rq_system, lp->lp_systemid.lsi_mac, ETHER_ADDR_LEN);
+	info->mi_rq_xid = htonl(0);
+}
+
+static int
+lacp_xmit_lacpdu(struct lacp_port *lp)
+{
+	struct lagg_port *lgp = lp->lp_lagg;
+	struct mbuf *m;
+	struct lacpdu *du;
+	int error;
+
+	LAGG_WLOCK_ASSERT(lgp->lp_softc);
+
+	m = m_gethdr(M_DONTWAIT, MT_DATA);
+	if (m == NULL) {
+		return (ENOMEM);
+	}
+	m->m_len = m->m_pkthdr.len = sizeof(*du);
+
+	du = mtod(m, struct lacpdu *);
+	memset(du, 0, sizeof(*du));
+
+	memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
+	    ETHER_ADDR_LEN);
+	memcpy(&du->ldu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN);
+	du->ldu_eh.ether_type = htons(ETHERTYPE_SLOW);
+
+	du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP;
+	du->ldu_sph.sph_version = 1;
+
+	TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor));
+	du->ldu_actor = lp->lp_actor;
+
+	TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO,
+	    sizeof(du->ldu_partner));
+	du->ldu_partner = lp->lp_partner;
+
+	TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO,
+	    sizeof(du->ldu_collector));
+	du->ldu_collector.lci_maxdelay = 0;
+
+#if defined(LACP_DEBUG)
+	LACP_DPRINTF((lp, "lacpdu transmit\n"));
+	lacp_dump_lacpdu(du);
+#endif /* defined(LACP_DEBUG) */
+
+	m->m_flags |= M_MCAST;
+
+	/*
+	 * XXX should use higher priority queue.
+	 * otherwise network congestion can break aggregation.
+	 */
+
+	error = lagg_enqueue(lp->lp_ifp, m);
+	return (error);
+}
+
+static int
+lacp_xmit_marker(struct lacp_port *lp)
+{
+	struct lagg_port *lgp = lp->lp_lagg;
+	struct mbuf *m;
+	struct markerdu *mdu;
+	int error;
+
+	LAGG_WLOCK_ASSERT(lgp->lp_softc);
+
+	m = m_gethdr(M_DONTWAIT, MT_DATA);
+	if (m == NULL) {
+		return (ENOMEM);
+	}
+	m->m_len = m->m_pkthdr.len = sizeof(*mdu);
+
+	mdu = mtod(m, struct markerdu *);
+	memset(mdu, 0, sizeof(*mdu));
+
+	memcpy(&mdu->mdu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
+	    ETHER_ADDR_LEN);
+	memcpy(&mdu->mdu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN);
+	mdu->mdu_eh.ether_type = htons(ETHERTYPE_SLOW);
+
+	mdu->mdu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_MARKER;
+	mdu->mdu_sph.sph_version = 1;
+
+	/* Bump the transaction id and copy over the marker info */
+	lp->lp_marker.mi_rq_xid = htonl(ntohl(lp->lp_marker.mi_rq_xid) + 1);
+	TLV_SET(&mdu->mdu_tlv, MARKER_TYPE_INFO, sizeof(mdu->mdu_info));
+	mdu->mdu_info = lp->lp_marker;
+
+	LACP_DPRINTF((lp, "marker transmit, port=%u, sys=%6D, id=%u\n",
+	    ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system, ":",
+	    ntohl(mdu->mdu_info.mi_rq_xid)));
+
+	m->m_flags |= M_MCAST;
+	error = lagg_enqueue(lp->lp_ifp, m);
+	return (error);
+}
+void
+lacp_linkstate(struct lagg_port *lgp)
+{
+	struct lacp_port *lp = LACP_PORT(lgp);
+	struct ifnet *ifp = lgp->lp_ifp;
+	struct ifmediareq ifmr;
+	int error = 0;
+	u_int media;
+	uint8_t old_state;
+	uint16_t old_key;
+
+	LAGG_WLOCK_ASSERT(lgp->lp_softc);
+
+	bzero((char *)&ifmr, sizeof(ifmr));
+	error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
+	if (error != 0)
+		return;
+
+	media = ifmr.ifm_active;
+	LACP_DPRINTF((lp, "media changed 0x%x -> 0x%x, ether = %d, fdx = %d, "
+	    "link = %d\n", lp->lp_media, media, IFM_TYPE(media) == IFM_ETHER,
+	    (media & IFM_FDX) != 0, ifp->if_link_state == LINK_STATE_UP));
+	old_state = lp->lp_state;
+	old_key = lp->lp_key;
+
+	lp->lp_media = media;
+	/*
+	 * If the port is not an active full duplex Ethernet link then it can
+	 * not be aggregated.
+	 */
+	if (IFM_TYPE(media) != IFM_ETHER || (media & IFM_FDX) == 0 ||
+	    ifp->if_link_state != LINK_STATE_UP) {
+		lacp_port_disable(lp);
+	} else {
+		lacp_port_enable(lp);
+	}
+	lp->lp_key = lacp_compose_key(lp);
+
+	if (old_state != lp->lp_state || old_key != lp->lp_key) {
+		LACP_DPRINTF((lp, "-> UNSELECTED\n"));
+		lp->lp_selected = LACP_UNSELECTED;
+	}
+}
+
+static void
+lacp_tick(void *arg)
+{
+	struct lacp_softc *lsc = arg;
+	struct lagg_softc *sc = lsc->lsc_softc;
+	struct lacp_port *lp;
+
+	LAGG_WLOCK(sc);
+	LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) {
+		if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
+			continue;
+
+		lacp_run_timers(lp);
+
+		lacp_select(lp);
+		lacp_sm_mux(lp);
+		lacp_sm_tx(lp);
+		lacp_sm_ptx_tx_schedule(lp);
+	}
+	LAGG_WUNLOCK(sc);
+	callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
+}
+
+int
+lacp_port_create(struct lagg_port *lgp)
+{
+	struct lagg_softc *sc = lgp->lp_softc;
+	struct lacp_softc *lsc = LACP_SOFTC(sc);
+	struct lacp_port *lp;
+	struct ifnet *ifp = lgp->lp_ifp;
+	struct sockaddr_dl sdl;
+	struct ifmultiaddr *rifma = NULL;
+	int error;
+
+	boolean_t active = TRUE; /* XXX should be configurable */
+	boolean_t fast = FALSE; /* XXX should be configurable */
+
+	LAGG_WLOCK_ASSERT(sc);
+
+	bzero((char *)&sdl, sizeof(sdl));
+	sdl.sdl_len = sizeof(sdl);
+	sdl.sdl_family = AF_LINK;
+	sdl.sdl_index = ifp->if_index;
+	sdl.sdl_type = IFT_ETHER;
+	sdl.sdl_alen = ETHER_ADDR_LEN;
+
+	bcopy(&ethermulticastaddr_slowprotocols,
+	    LLADDR(&sdl), ETHER_ADDR_LEN);
+	error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
+	if (error) {
+		printf("%s: ADDMULTI failed on %s\n", __func__, lgp->lp_ifname);
+		return (error);
+	}
+
+	lp = malloc(sizeof(struct lacp_port),
+	    M_DEVBUF, M_NOWAIT|M_ZERO);
+	if (lp == NULL)
+		return (ENOMEM);
+
+	lgp->lp_psc = (caddr_t)lp;
+	lp->lp_ifp = ifp;
+	lp->lp_lagg = lgp;
+	lp->lp_lsc = lsc;
+	lp->lp_ifma = rifma;
+
+	LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next);
+
+	lacp_fill_actorinfo(lp, &lp->lp_actor);
+	lacp_fill_markerinfo(lp, &lp->lp_marker);
+	lp->lp_state =
+	    (active ? LACP_STATE_ACTIVITY : 0) |
+	    (fast ? LACP_STATE_TIMEOUT : 0);
+	lp->lp_aggregator = NULL;
+	lacp_linkstate(lgp);
+	lacp_sm_rx_set_expired(lp);
+
+	return (0);
+}
+
+void
+lacp_port_destroy(struct lagg_port *lgp)
+{
+	struct lacp_port *lp = LACP_PORT(lgp);
+	int i;
+
+	LAGG_WLOCK_ASSERT(lgp->lp_softc);
+
+	for (i = 0; i < LACP_NTIMER; i++) {
+		LACP_TIMER_DISARM(lp, i);
+	}
+
+	lacp_disable_collecting(lp);
+	lacp_disable_distributing(lp);
+	lacp_unselect(lp);
+	lgp->lp_flags &= ~LAGG_PORT_DISABLED;
+
+	/* The address may have already been removed by if_purgemaddrs() */
+	if (!lgp->lp_detaching)
+		if_delmulti_ifma(lp->lp_ifma);
+
+	LIST_REMOVE(lp, lp_next);
+	free(lp, M_DEVBUF);
+}
+
+int
+lacp_port_isactive(struct lagg_port *lgp)
+{
+	struct lacp_port *lp = LACP_PORT(lgp);
+	struct lacp_softc *lsc = lp->lp_lsc;
+	struct lacp_aggregator *la = lp->lp_aggregator;
+
+	/* This port is joined to the active aggregator */
+	if (la != NULL && la == lsc->lsc_active_aggregator)
+		return (1);
+
+	return (0);
+}
+
+void
+lacp_req(struct lagg_softc *sc, caddr_t data)
+{
+	struct lacp_opreq *req = (struct lacp_opreq *)data;
+	struct lacp_softc *lsc = LACP_SOFTC(sc);
+	struct lacp_aggregator *la = lsc->lsc_active_aggregator;
+
+	bzero(req, sizeof(struct lacp_opreq));
+	if (la != NULL) {
+		req->actor_prio = ntohs(la->la_actor.lip_systemid.lsi_prio);
+		memcpy(&req->actor_mac, &la->la_actor.lip_systemid.lsi_mac,
+		    ETHER_ADDR_LEN);
+		req->actor_key = ntohs(la->la_actor.lip_key);
+		req->actor_portprio = ntohs(la->la_actor.lip_portid.lpi_prio);
+		req->actor_portno = ntohs(la->la_actor.lip_portid.lpi_portno);
+		req->actor_state = la->la_actor.lip_state;
+
+		req->partner_prio = ntohs(la->la_partner.lip_systemid.lsi_prio);
+		memcpy(&req->partner_mac, &la->la_partner.lip_systemid.lsi_mac,
+		    ETHER_ADDR_LEN);
+		req->partner_key = ntohs(la->la_partner.lip_key);
+		req->partner_portprio = ntohs(la->la_partner.lip_portid.lpi_prio);
+		req->partner_portno = ntohs(la->la_partner.lip_portid.lpi_portno);
+		req->partner_state = la->la_partner.lip_state;
+	}
+}
+
+void
+lacp_portreq(struct lagg_port *lgp, caddr_t data)
+{
+	struct lacp_opreq *req = (struct lacp_opreq *)data;
+	struct lacp_port *lp = LACP_PORT(lgp);
+
+	req->actor_prio = ntohs(lp->lp_actor.lip_systemid.lsi_prio);
+	memcpy(&req->actor_mac, &lp->lp_actor.lip_systemid.lsi_mac,
+	    ETHER_ADDR_LEN);
+	req->actor_key = ntohs(lp->lp_actor.lip_key);
+	req->actor_portprio = ntohs(lp->lp_actor.lip_portid.lpi_prio);
+	req->actor_portno = ntohs(lp->lp_actor.lip_portid.lpi_portno);
+	req->actor_state = lp->lp_actor.lip_state;
+
+	req->partner_prio = ntohs(lp->lp_partner.lip_systemid.lsi_prio);
+	memcpy(&req->partner_mac, &lp->lp_partner.lip_systemid.lsi_mac,
+	    ETHER_ADDR_LEN);
+	req->partner_key = ntohs(lp->lp_partner.lip_key);
+	req->partner_portprio = ntohs(lp->lp_partner.lip_portid.lpi_prio);
+	req->partner_portno = ntohs(lp->lp_partner.lip_portid.lpi_portno);
+	req->partner_state = lp->lp_partner.lip_state;
+}
+
+static void
+lacp_disable_collecting(struct lacp_port *lp)
+{
+	struct lagg_port *lgp = lp->lp_lagg;
+
+	LACP_DPRINTF((lp, "collecting disabled\n"));
+
+	lp->lp_state &= ~LACP_STATE_COLLECTING;
+	lgp->lp_flags &= ~LAGG_PORT_COLLECTING;
+}
+
+static void
+lacp_enable_collecting(struct lacp_port *lp)
+{
+	struct lagg_port *lgp = lp->lp_lagg;
+
+	LACP_DPRINTF((lp, "collecting enabled\n"));
+
+	lp->lp_state |= LACP_STATE_COLLECTING;
+	lgp->lp_flags |= LAGG_PORT_COLLECTING;
+}
+
+static void
+lacp_disable_distributing(struct lacp_port *lp)
+{
+	struct lacp_aggregator *la = lp->lp_aggregator;
+	struct lacp_softc *lsc = lp->lp_lsc;
+	struct lagg_port *lgp = lp->lp_lagg;
+#if defined(LACP_DEBUG)
+	char buf[LACP_LAGIDSTR_MAX+1];
+#endif /* defined(LACP_DEBUG) */
+
+	LAGG_WLOCK_ASSERT(lgp->lp_softc);
+
+	if (la == NULL || (lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) {
+		return;
+	}
+
+	KASSERT(!TAILQ_EMPTY(&la->la_ports), ("no aggregator ports"));
+	KASSERT(la->la_nports > 0, ("nports invalid (%d)", la->la_nports));
+	KASSERT(la->la_refcnt >= la->la_nports, ("aggregator refcnt invalid"));
+
+	LACP_DPRINTF((lp, "disable distributing on aggregator %s, "
+	    "nports %d -> %d\n",
+	    lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
+	    la->la_nports, la->la_nports - 1));
+
+	TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
+	la->la_nports--;
+
+	lacp_suppress_distributing(lsc, la);
+
+	lp->lp_state &= ~LACP_STATE_DISTRIBUTING;
+	lgp->lp_flags &= ~LAGG_PORT_DISTRIBUTING;
+
+	if (lsc->lsc_active_aggregator == la) {
+		lacp_select_active_aggregator(lsc);
+	}
+}
+
+static void
+lacp_enable_distributing(struct lacp_port *lp)
+{
+	struct lacp_aggregator *la = lp->lp_aggregator;
+	struct lacp_softc *lsc = lp->lp_lsc;
+	struct lagg_port *lgp = lp->lp_lagg;
+#if defined(LACP_DEBUG)
+	char buf[LACP_LAGIDSTR_MAX+1];
+#endif /* defined(LACP_DEBUG) */
+
+	LAGG_WLOCK_ASSERT(lgp->lp_softc);
+
+	if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) {
+		return;
+	}
+
+	LACP_DPRINTF((lp, "enable distributing on aggregator %s, "
+	    "nports %d -> %d\n",
+	    lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
+	    la->la_nports, la->la_nports + 1));
+
+	KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
+	TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
+	la->la_nports++;
+
+	lacp_suppress_distributing(lsc, la);
+
+	lp->lp_state |= LACP_STATE_DISTRIBUTING;
+	lgp->lp_flags |= LAGG_PORT_DISTRIBUTING;
+
+	if (lsc->lsc_active_aggregator != la) {
+		lacp_select_active_aggregator(lsc);
+	}
+}
+
+static void
+lacp_transit_expire(void *vp)
+{
+	struct lacp_softc *lsc = vp;
+
+	LACP_DPRINTF((NULL, "%s\n", __func__));
+	lsc->lsc_suppress_distributing = FALSE;
+}
+
+int
+lacp_attach(struct lagg_softc *sc)
+{
+	struct lacp_softc *lsc;
+
+	LAGG_WLOCK_ASSERT(sc);
+
+	lsc = malloc(sizeof(struct lacp_softc),
+	    M_DEVBUF, M_NOWAIT|M_ZERO);
+	if (lsc == NULL)
+		return (ENOMEM);
+
+	sc->sc_psc = (caddr_t)lsc;
+	lsc->lsc_softc = sc;
+
+	lsc->lsc_hashkey = arc4random();
+	lsc->lsc_active_aggregator = NULL;
+	TAILQ_INIT(&lsc->lsc_aggregators);
+	LIST_INIT(&lsc->lsc_ports);
+
+	TASK_INIT(&lsc->lsc_qtask, 0, lacp_dequeue, lsc);
+	mtx_init(&lsc->lsc_queue.ifq_mtx, "lacp queue", NULL, MTX_DEF);
+	lsc->lsc_queue.ifq_maxlen = ifqmaxlen;
+
+	callout_init(&lsc->lsc_transit_callout, CALLOUT_MPSAFE);
+	callout_init(&lsc->lsc_callout, CALLOUT_MPSAFE);
+
+	/* if the lagg is already up then do the same */
+	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
+		lacp_init(sc);
+
+	return (0);
+}
+
+int
+lacp_detach(struct lagg_softc *sc)
+{
+	struct lacp_softc *lsc = LACP_SOFTC(sc);
+
+	KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
+	    ("aggregators still active"));
+	KASSERT(lsc->lsc_active_aggregator == NULL,
+	    ("aggregator still attached"));
+
+	sc->sc_psc = NULL;
+	callout_drain(&lsc->lsc_transit_callout);
+	callout_drain(&lsc->lsc_callout);
+	taskqueue_drain(taskqueue_swi, &lsc->lsc_qtask);
+	IF_DRAIN(&lsc->lsc_queue);
+	mtx_destroy(&lsc->lsc_queue.ifq_mtx);
+
+	free(lsc, M_DEVBUF);
+	return (0);
+}
+
+void
+lacp_init(struct lagg_softc *sc)
+{
+	struct lacp_softc *lsc = LACP_SOFTC(sc);
+
+	callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
+}
+
+void
+lacp_stop(struct lagg_softc *sc)
+{
+	struct lacp_softc *lsc = LACP_SOFTC(sc);
+
+	callout_stop(&lsc->lsc_transit_callout);
+	callout_stop(&lsc->lsc_callout);
+}
+
+struct lagg_port *
+lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
+{
+	struct lacp_softc *lsc = LACP_SOFTC(sc);
+	struct lacp_aggregator *la;
+	struct lacp_port *lp;
+	uint32_t hash;
+	int nports;
+
+	LAGG_RLOCK_ASSERT(sc);
+
+	if (__predict_false(lsc->lsc_suppress_distributing)) {
+		LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
+		return (NULL);
+	}
+
+	la = lsc->lsc_active_aggregator;
+	if (__predict_false(la == NULL)) {
+		LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
+		return (NULL);
+	}
+
+	nports = la->la_nports;
+	KASSERT(nports > 0, ("no ports available"));
+
+	hash = lagg_hashmbuf(m, lsc->lsc_hashkey);
+	hash %= nports;
+	lp = TAILQ_FIRST(&la->la_ports);
+	while (hash--) {
+		lp = TAILQ_NEXT(lp, lp_dist_q);
+	}
+
+	KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0,
+	    ("aggregated port is not distributing"));
+
+	return (lp->lp_lagg);
+}
+/*
+ * lacp_suppress_distributing: drop transmit packets for a while
+ * to preserve packet ordering.
+ */
+
+static void
+lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la)
+{
+	struct lacp_port *lp;
+
+	if (lsc->lsc_active_aggregator != la) {
+		return;
+	}
+
+	LACP_DPRINTF((NULL, "%s\n", __func__));
+	lsc->lsc_suppress_distributing = TRUE;
+
+	/* send a marker frame down each port to verify the queues are empty */
+	LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) {
+		lp->lp_flags |= LACP_PORT_MARK;
+		lacp_xmit_marker(lp);
+	}
+
+	/* set a timeout for the marker frames */
+	callout_reset(&lsc->lsc_transit_callout,
+	    LACP_TRANSIT_DELAY * hz / 1000, lacp_transit_expire, lsc);
+}
+
+static int
+lacp_compare_peerinfo(const struct lacp_peerinfo *a,
+    const struct lacp_peerinfo *b)
+{
+	return (memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state)));
+}
+
+static int
+lacp_compare_systemid(const struct lacp_systemid *a,
+    const struct lacp_systemid *b)
+{
+	return (memcmp(a, b, sizeof(*a)));
+}
+
+#if 0	/* unused */
+static int
+lacp_compare_portid(const struct lacp_portid *a,
+    const struct lacp_portid *b)
+{
+	return (memcmp(a, b, sizeof(*a)));
+}
+#endif
+
+static uint64_t
+lacp_aggregator_bandwidth(struct lacp_aggregator *la)
+{
+	struct lacp_port *lp;
+	uint64_t speed;
+
+	lp = TAILQ_FIRST(&la->la_ports);
+	if (lp == NULL) {
+		return (0);
+	}
+
+	speed = ifmedia_baudrate(lp->lp_media);
+	speed *= la->la_nports;
+	if (speed == 0) {
+		LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n",
+		    lp->lp_media, la->la_nports));
+	}
+
+	return (speed);
+}
+
+/*
+ * lacp_select_active_aggregator: select an aggregator to be used to transmit
+ * packets from lagg(4) interface.
+ */
+
+static void
+lacp_select_active_aggregator(struct lacp_softc *lsc)
+{
+	struct lacp_aggregator *la;
+	struct lacp_aggregator *best_la = NULL;
+	uint64_t best_speed = 0;
+#if defined(LACP_DEBUG)
+	char buf[LACP_LAGIDSTR_MAX+1];
+#endif /* defined(LACP_DEBUG) */
+
+	LACP_DPRINTF((NULL, "%s:\n", __func__));
+
+	TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
+		uint64_t speed;
+
+		if (la->la_nports == 0) {
+			continue;
+		}
+
+		speed = lacp_aggregator_bandwidth(la);
+		LACP_DPRINTF((NULL, "%s, speed=%jd, nports=%d\n",
+		    lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
+		    speed, la->la_nports));
+
+		/* This aggregator is chosen if
+		 *      the partner has a better system priority
+		 *  or, the total aggregated speed is higher
+		 *  or, it is already the chosen aggregator
+		 */
+		if ((best_la != NULL && LACP_SYS_PRI(la->la_partner) <
+		     LACP_SYS_PRI(best_la->la_partner)) ||
+		    speed > best_speed ||
+		    (speed == best_speed &&
+		    la == lsc->lsc_active_aggregator)) {
+			best_la = la;
+			best_speed = speed;
+		}
+	}
+
+	KASSERT(best_la == NULL || best_la->la_nports > 0,
+	    ("invalid aggregator refcnt"));
+	KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports),
+	    ("invalid aggregator list"));
+
+#if defined(LACP_DEBUG)
+	if (lsc->lsc_active_aggregator != best_la) {
+		LACP_DPRINTF((NULL, "active aggregator changed\n"));
+		LACP_DPRINTF((NULL, "old %s\n",
+		    lacp_format_lagid_aggregator(lsc->lsc_active_aggregator,
+		    buf, sizeof(buf))));
+	} else {
+		LACP_DPRINTF((NULL, "active aggregator not changed\n"));
+	}
+	LACP_DPRINTF((NULL, "new %s\n",
+	    lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
+#endif /* defined(LACP_DEBUG) */
+
+	if (lsc->lsc_active_aggregator != best_la) {
+		lsc->lsc_active_aggregator = best_la;
+		if (best_la) {
+			lacp_suppress_distributing(lsc, best_la);
+		}
+	}
+}
+
+static uint16_t
+lacp_compose_key(struct lacp_port *lp)
+{
+	struct lagg_port *lgp = lp->lp_lagg;
+	struct lagg_softc *sc = lgp->lp_softc;
+	u_int media = lp->lp_media;
+	uint16_t key;
+
+	if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) {
+
+		/*
+		 * non-aggregatable links should have unique keys.
+		 *
+		 * XXX this isn't really unique as if_index is 16 bit.
+		 */
+
+		/* bit 0..14:	(some bits of) if_index of this port */
+		key = lp->lp_ifp->if_index;
+		/* bit 15:	1 */
+		key |= 0x8000;
+	} else {
+		u_int subtype = IFM_SUBTYPE(media);
+
+		KASSERT(IFM_TYPE(media) == IFM_ETHER, ("invalid media type"));
+		KASSERT((media & IFM_FDX) != 0, ("aggregating HDX interface"));
+
+		/* bit 0..4:	IFM_SUBTYPE */
+		key = subtype;
+		/* bit 5..14:	(some bits of) if_index of lagg device */
+		key |= 0x7fe0 & ((sc->sc_ifp->if_index) << 5);
+		/* bit 15:	0 */
+	}
+	return (htons(key));
+}
+
+static void
+lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la)
+{
+#if defined(LACP_DEBUG)
+	char buf[LACP_LAGIDSTR_MAX+1];
+#endif
+
+	LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
+	    __func__,
+	    lacp_format_lagid(&la->la_actor, &la->la_partner,
+	    buf, sizeof(buf)),
+	    la->la_refcnt, la->la_refcnt + 1));
+
+	KASSERT(la->la_refcnt > 0, ("refcount <= 0"));
+	la->la_refcnt++;
+	KASSERT(la->la_refcnt > la->la_nports, ("invalid refcount"));
+}
+
+static void
+lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la)
+{
+#if defined(LACP_DEBUG)
+	char buf[LACP_LAGIDSTR_MAX+1];
+#endif
+
+	LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
+	    __func__,
+	    lacp_format_lagid(&la->la_actor, &la->la_partner,
+	    buf, sizeof(buf)),
+	    la->la_refcnt, la->la_refcnt - 1));
+
+	KASSERT(la->la_refcnt > la->la_nports, ("invalid refcnt"));
+	la->la_refcnt--;
+	if (la->la_refcnt > 0) {
+		return;
+	}
+
+	KASSERT(la->la_refcnt == 0, ("refcount not zero"));
+	KASSERT(lsc->lsc_active_aggregator != la, ("aggregator active"));
+
+	TAILQ_REMOVE(&lsc->lsc_aggregators, la, la_q);
+
+	free(la, M_DEVBUF);
+}
+
+/*
+ * lacp_aggregator_get: allocate an aggregator.
+ */
+
+static struct lacp_aggregator *
+lacp_aggregator_get(struct lacp_softc *lsc, struct lacp_port *lp)
+{
+	struct lacp_aggregator *la;
+
+	la = malloc(sizeof(*la), M_DEVBUF, M_NOWAIT);
+	if (la) {
+		la->la_refcnt = 1;
+		la->la_nports = 0;
+		TAILQ_INIT(&la->la_ports);
+		la->la_pending = 0;
+		TAILQ_INSERT_TAIL(&lsc->lsc_aggregators, la, la_q);
+	}
+
+	return (la);
+}
+
+/*
+ * lacp_fill_aggregator_id: setup a newly allocated aggregator from a port.
+ */
+
+static void
+lacp_fill_aggregator_id(struct lacp_aggregator *la, const struct lacp_port *lp)
+{
+	lacp_fill_aggregator_id_peer(&la->la_partner, &lp->lp_partner);
+	lacp_fill_aggregator_id_peer(&la->la_actor, &lp->lp_actor);
+
+	la->la_actor.lip_state = lp->lp_state & LACP_STATE_AGGREGATION;
+}
+
+static void
+lacp_fill_aggregator_id_peer(struct lacp_peerinfo *lpi_aggr,
+    const struct lacp_peerinfo *lpi_port)
+{
+	memset(lpi_aggr, 0, sizeof(*lpi_aggr));
+	lpi_aggr->lip_systemid = lpi_port->lip_systemid;
+	lpi_aggr->lip_key = lpi_port->lip_key;
+}
+
+/*
+ * lacp_aggregator_is_compatible: check if a port can join to an aggregator.
+ */
+
+static int
+lacp_aggregator_is_compatible(const struct lacp_aggregator *la,
+    const struct lacp_port *lp)
+{
+	if (!(lp->lp_state & LACP_STATE_AGGREGATION) ||
+	    !(lp->lp_partner.lip_state & LACP_STATE_AGGREGATION)) {
+		return (0);
+	}
+
+	if (!(la->la_actor.lip_state & LACP_STATE_AGGREGATION)) {
+		return (0);
+	}
+
+	if (!lacp_peerinfo_is_compatible(&la->la_partner, &lp->lp_partner)) {
+		return (0);
+	}
+
+	if (!lacp_peerinfo_is_compatible(&la->la_actor, &lp->lp_actor)) {
+		return (0);
+	}
+
+	return (1);
+}
+
+static int
+lacp_peerinfo_is_compatible(const struct lacp_peerinfo *a,
+    const struct lacp_peerinfo *b)
+{
+	if (memcmp(&a->lip_systemid, &b->lip_systemid,
+	    sizeof(a->lip_systemid))) {
+		return (0);
+	}
+
+	if (memcmp(&a->lip_key, &b->lip_key, sizeof(a->lip_key))) {
+		return (0);
+	}
+
+	return (1);
+}
+
+static void
+lacp_port_enable(struct lacp_port *lp)
+{
+	struct lagg_port *lgp = lp->lp_lagg;
+
+	lp->lp_state |= LACP_STATE_AGGREGATION;
+	lgp->lp_flags &= ~LAGG_PORT_DISABLED;
+}
+
+static void
+lacp_port_disable(struct lacp_port *lp)
+{
+	struct lagg_port *lgp = lp->lp_lagg;
+
+	lacp_set_mux(lp, LACP_MUX_DETACHED);
+
+	lp->lp_state &= ~LACP_STATE_AGGREGATION;
+	lp->lp_selected = LACP_UNSELECTED;
+	lacp_sm_rx_record_default(lp);
+	lp->lp_partner.lip_state &= ~LACP_STATE_AGGREGATION;
+	lp->lp_state &= ~LACP_STATE_EXPIRED;
+	lgp->lp_flags |= LAGG_PORT_DISABLED;
+}
+
+/*
+ * lacp_select: select an aggregator.  create one if necessary.
+ */
+static void
+lacp_select(struct lacp_port *lp)
+{
+	struct lacp_softc *lsc = lp->lp_lsc;
+	struct lacp_aggregator *la;
+#if defined(LACP_DEBUG)
+	char buf[LACP_LAGIDSTR_MAX+1];
+#endif
+
+	if (lp->lp_aggregator) {
+		return;
+	}
+
+	KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
+	    ("timer_wait_while still active"));
+
+	LACP_DPRINTF((lp, "port lagid=%s\n",
+	    lacp_format_lagid(&lp->lp_actor, &lp->lp_partner,
+	    buf, sizeof(buf))));
+
+	TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
+		if (lacp_aggregator_is_compatible(la, lp)) {
+			break;
+		}
+	}
+
+	if (la == NULL) {
+		la = lacp_aggregator_get(lsc, lp);
+		if (la == NULL) {
+			LACP_DPRINTF((lp, "aggregator creation failed\n"));
+
+			/*
+			 * will retry on the next tick.
+			 */
+
+			return;
+		}
+		lacp_fill_aggregator_id(la, lp);
+		LACP_DPRINTF((lp, "aggregator created\n"));
+	} else {
+		LACP_DPRINTF((lp, "compatible aggregator found\n"));
+		lacp_aggregator_addref(lsc, la);
+	}
+
+	LACP_DPRINTF((lp, "aggregator lagid=%s\n",
+	    lacp_format_lagid(&la->la_actor, &la->la_partner,
+	    buf, sizeof(buf))));
+
+	lp->lp_aggregator = la;
+	lp->lp_selected = LACP_SELECTED;
+}
+
+/*
+ * lacp_unselect: finish unselect/detach process.
+ */
+
+static void
+lacp_unselect(struct lacp_port *lp)
+{
+	struct lacp_softc *lsc = lp->lp_lsc;
+	struct lacp_aggregator *la = lp->lp_aggregator;
+
+	KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
+	    ("timer_wait_while still active"));
+
+	if (la == NULL) {
+		return;
+	}
+
+	lp->lp_aggregator = NULL;
+	lacp_aggregator_delref(lsc, la);
+}
+
+/* mux machine */
+
+static void
+lacp_sm_mux(struct lacp_port *lp)
+{
+	enum lacp_mux_state new_state;
+	boolean_t p_sync =
+		    (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0;
+	boolean_t p_collecting =
+	    (lp->lp_partner.lip_state & LACP_STATE_COLLECTING) != 0;
+	enum lacp_selected selected = lp->lp_selected;
+	struct lacp_aggregator *la;
+
+	/* LACP_DPRINTF((lp, "%s: state %d\n", __func__, lp->lp_mux_state)); */
+
+re_eval:
+	la = lp->lp_aggregator;
+	KASSERT(lp->lp_mux_state == LACP_MUX_DETACHED || la != NULL,
+	    ("MUX not detached"));
+	new_state = lp->lp_mux_state;
+	switch (lp->lp_mux_state) {
+	case LACP_MUX_DETACHED:
+		if (selected != LACP_UNSELECTED) {
+			new_state = LACP_MUX_WAITING;
+		}
+		break;
+	case LACP_MUX_WAITING:
+		KASSERT(la->la_pending > 0 ||
+		    !LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
+		    ("timer_wait_while still active"));
+		if (selected == LACP_SELECTED && la->la_pending == 0) {
+			new_state = LACP_MUX_ATTACHED;
+		} else if (selected == LACP_UNSELECTED) {
+			new_state = LACP_MUX_DETACHED;
+		}
+		break;
+	case LACP_MUX_ATTACHED:
+		if (selected == LACP_SELECTED && p_sync) {
+			new_state = LACP_MUX_COLLECTING;
+		} else if (selected != LACP_SELECTED) {
+			new_state = LACP_MUX_DETACHED;
+		}
+		break;
+	case LACP_MUX_COLLECTING:
+		if (selected == LACP_SELECTED && p_sync && p_collecting) {
+			new_state = LACP_MUX_DISTRIBUTING;
+		} else if (selected != LACP_SELECTED || !p_sync) {
+			new_state = LACP_MUX_ATTACHED;
+		}
+		break;
+	case LACP_MUX_DISTRIBUTING:
+		if (selected != LACP_SELECTED || !p_sync || !p_collecting) {
+			new_state = LACP_MUX_COLLECTING;
+		}
+		break;
+	default:
+		panic("%s: unknown state", __func__);
+	}
+
+	if (lp->lp_mux_state == new_state) {
+		return;
+	}
+
+	lacp_set_mux(lp, new_state);
+	goto re_eval;
+}
+
+static void
+lacp_set_mux(struct lacp_port *lp, enum lacp_mux_state new_state)
+{
+	struct lacp_aggregator *la = lp->lp_aggregator;
+
+	if (lp->lp_mux_state == new_state) {
+		return;
+	}
+
+	switch (new_state) {
+	case LACP_MUX_DETACHED:
+		lp->lp_state &= ~LACP_STATE_SYNC;
+		lacp_disable_distributing(lp);
+		lacp_disable_collecting(lp);
+		lacp_sm_assert_ntt(lp);
+		/* cancel timer */
+		if (LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)) {
+			KASSERT(la->la_pending > 0,
+			    ("timer_wait_while not active"));
+			la->la_pending--;
+		}
+		LACP_TIMER_DISARM(lp, LACP_TIMER_WAIT_WHILE);
+		lacp_unselect(lp);
+		break;
+	case LACP_MUX_WAITING:
+		LACP_TIMER_ARM(lp, LACP_TIMER_WAIT_WHILE,
+		    LACP_AGGREGATE_WAIT_TIME);
+		la->la_pending++;
+		break;
+	case LACP_MUX_ATTACHED:
+		lp->lp_state |= LACP_STATE_SYNC;
+		lacp_disable_collecting(lp);
+		lacp_sm_assert_ntt(lp);
+		break;
+	case LACP_MUX_COLLECTING:
+		lacp_enable_collecting(lp);
+		lacp_disable_distributing(lp);
+		lacp_sm_assert_ntt(lp);
+		break;
+	case LACP_MUX_DISTRIBUTING:
+		lacp_enable_distributing(lp);
+		break;
+	default:
+		panic("%s: unknown state", __func__);
+	}
+
+	LACP_DPRINTF((lp, "mux_state %d -> %d\n", lp->lp_mux_state, new_state));
+
+	lp->lp_mux_state = new_state;
+}
+
+static void
+lacp_sm_mux_timer(struct lacp_port *lp)
+{
+	struct lacp_aggregator *la = lp->lp_aggregator;
+#if defined(LACP_DEBUG)
+	char buf[LACP_LAGIDSTR_MAX+1];
+#endif
+
+	KASSERT(la->la_pending > 0, ("no pending event"));
+
+	LACP_DPRINTF((lp, "%s: aggregator %s, pending %d -> %d\n", __func__,
+	    lacp_format_lagid(&la->la_actor, &la->la_partner,
+	    buf, sizeof(buf)),
+	    la->la_pending, la->la_pending - 1));
+
+	la->la_pending--;
+}
+
+/* periodic transmit machine */
+
+static void
+lacp_sm_ptx_update_timeout(struct lacp_port *lp, uint8_t oldpstate)
+{
+	if (LACP_STATE_EQ(oldpstate, lp->lp_partner.lip_state,
+	    LACP_STATE_TIMEOUT)) {
+		return;
+	}
+
+	LACP_DPRINTF((lp, "partner timeout changed\n"));
+
+	/*
+	 * FAST_PERIODIC -> SLOW_PERIODIC
+	 * or
+	 * SLOW_PERIODIC (-> PERIODIC_TX) -> FAST_PERIODIC
+	 *
+	 * let lacp_sm_ptx_tx_schedule to update timeout.
+	 */
+
+	LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
+
+	/*
+	 * if timeout has been shortened, assert NTT.
+	 */
+
+	if ((lp->lp_partner.lip_state & LACP_STATE_TIMEOUT)) {
+		lacp_sm_assert_ntt(lp);
+	}
+}
+
+static void
+lacp_sm_ptx_tx_schedule(struct lacp_port *lp)
+{
+	int timeout;
+
+	if (!(lp->lp_state & LACP_STATE_ACTIVITY) &&
+	    !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) {
+
+		/*
+		 * NO_PERIODIC
+		 */
+
+		LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
+		return;
+	}
+
+	if (LACP_TIMER_ISARMED(lp, LACP_TIMER_PERIODIC)) {
+		return;
+	}
+
+	timeout = (lp->lp_partner.lip_state & LACP_STATE_TIMEOUT) ?
+	    LACP_FAST_PERIODIC_TIME : LACP_SLOW_PERIODIC_TIME;
+
+	LACP_TIMER_ARM(lp, LACP_TIMER_PERIODIC, timeout);
+}
+
+static void
+lacp_sm_ptx_timer(struct lacp_port *lp)
+{
+	lacp_sm_assert_ntt(lp);
+}
+
+static void
+lacp_sm_rx(struct lacp_port *lp, const struct lacpdu *du)
+{
+	int timeout;
+
+	/*
+	 * check LACP_DISABLED first
+	 */
+
+	if (!(lp->lp_state & LACP_STATE_AGGREGATION)) {
+		return;
+	}
+
+	/*
+	 * check loopback condition.
+	 */
+
+	if (!lacp_compare_systemid(&du->ldu_actor.lip_systemid,
+	    &lp->lp_actor.lip_systemid)) {
+		return;
+	}
+
+	/*
+	 * EXPIRED, DEFAULTED, CURRENT -> CURRENT
+	 */
+
+	lacp_sm_rx_update_selected(lp, du);
+	lacp_sm_rx_update_ntt(lp, du);
+	lacp_sm_rx_record_pdu(lp, du);
+
+	timeout = (lp->lp_state & LACP_STATE_TIMEOUT) ?
+	    LACP_SHORT_TIMEOUT_TIME : LACP_LONG_TIMEOUT_TIME;
+	LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, timeout);
+
+	lp->lp_state &= ~LACP_STATE_EXPIRED;
+
+	/*
+	 * kick transmit machine without waiting the next tick.
+	 */
+
+	lacp_sm_tx(lp);
+}
+
+static void
+lacp_sm_rx_set_expired(struct lacp_port *lp)
+{
+	lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
+	lp->lp_partner.lip_state |= LACP_STATE_TIMEOUT;
+	LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, LACP_SHORT_TIMEOUT_TIME);
+	lp->lp_state |= LACP_STATE_EXPIRED;
+}
+
+static void
+lacp_sm_rx_timer(struct lacp_port *lp)
+{
+	if ((lp->lp_state & LACP_STATE_EXPIRED) == 0) {
+		/* CURRENT -> EXPIRED */
+		LACP_DPRINTF((lp, "%s: CURRENT -> EXPIRED\n", __func__));
+		lacp_sm_rx_set_expired(lp);
+	} else {
+		/* EXPIRED -> DEFAULTED */
+		LACP_DPRINTF((lp, "%s: EXPIRED -> DEFAULTED\n", __func__));
+		lacp_sm_rx_update_default_selected(lp);
+		lacp_sm_rx_record_default(lp);
+		lp->lp_state &= ~LACP_STATE_EXPIRED;
+	}
+}
+
+static void
+lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du)
+{
+	boolean_t active;
+	uint8_t oldpstate;
+#if defined(LACP_DEBUG)
+	char buf[LACP_STATESTR_MAX+1];
+#endif
+
+	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+	oldpstate = lp->lp_partner.lip_state;
+
+	active = (du->ldu_actor.lip_state & LACP_STATE_ACTIVITY)
+	    || ((lp->lp_state & LACP_STATE_ACTIVITY) &&
+	    (du->ldu_partner.lip_state & LACP_STATE_ACTIVITY));
+
+	lp->lp_partner = du->ldu_actor;
+	if (active &&
+	    ((LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
+	    LACP_STATE_AGGREGATION) &&
+	    !lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner))
+	    || (du->ldu_partner.lip_state & LACP_STATE_AGGREGATION) == 0)) {
+		/* XXX nothing? */
+	} else {
+		lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
+	}
+
+	lp->lp_state &= ~LACP_STATE_DEFAULTED;
+
+	if (oldpstate != lp->lp_partner.lip_state) {
+		LACP_DPRINTF((lp, "old pstate %s\n",
+		    lacp_format_state(oldpstate, buf, sizeof(buf))));
+		LACP_DPRINTF((lp, "new pstate %s\n",
+		    lacp_format_state(lp->lp_partner.lip_state, buf,
+		    sizeof(buf))));
+	}
+
+	lacp_sm_ptx_update_timeout(lp, oldpstate);
+}
+
+static void
+lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du)
+{
+	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+	if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) ||
+	    !LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
+	    LACP_STATE_ACTIVITY | LACP_STATE_SYNC | LACP_STATE_AGGREGATION)) {
+		LACP_DPRINTF((lp, "%s: assert ntt\n", __func__));
+		lacp_sm_assert_ntt(lp);
+	}
+}
+
+static void
+lacp_sm_rx_record_default(struct lacp_port *lp)
+{
+	uint8_t oldpstate;
+
+	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+	oldpstate = lp->lp_partner.lip_state;
+	lp->lp_partner = lacp_partner_admin;
+	lp->lp_state |= LACP_STATE_DEFAULTED;
+	lacp_sm_ptx_update_timeout(lp, oldpstate);
+}
+
+static void
+lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp,
+    const struct lacp_peerinfo *info)
+{
+	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+	if (lacp_compare_peerinfo(&lp->lp_partner, info) ||
+	    !LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state,
+	    LACP_STATE_AGGREGATION)) {
+		lp->lp_selected = LACP_UNSELECTED;
+		/* mux machine will clean up lp->lp_aggregator */
+	}
+}
+
+static void
+lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du)
+{
+	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+	lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor);
+}
+
+static void
+lacp_sm_rx_update_default_selected(struct lacp_port *lp)
+{
+	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+	lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin);
+}
+
+/* transmit machine */
+
+static void
+lacp_sm_tx(struct lacp_port *lp)
+{
+	int error;
+
+	if (!(lp->lp_state & LACP_STATE_AGGREGATION)
+#if 1
+	    || (!(lp->lp_state & LACP_STATE_ACTIVITY)
+	    && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY))
+#endif
+	    ) {
+		lp->lp_flags &= ~LACP_PORT_NTT;
+	}
+
+	if (!(lp->lp_flags & LACP_PORT_NTT)) {
+		return;
+	}
+
+	/* Rate limit to 3 PDUs per LACP_FAST_PERIODIC_TIME */
+	if (ppsratecheck(&lp->lp_last_lacpdu, &lp->lp_lacpdu_sent,
+		    (3 / LACP_FAST_PERIODIC_TIME)) == 0) {
+		LACP_DPRINTF((lp, "rate limited pdu\n"));
+		return;
+	}
+
+	error = lacp_xmit_lacpdu(lp);
+
+	if (error == 0) {
+		lp->lp_flags &= ~LACP_PORT_NTT;
+	} else {
+		LACP_DPRINTF((lp, "lacpdu transmit failure, error %d\n",
+		    error));
+	}
+}
+
+static void
+lacp_sm_assert_ntt(struct lacp_port *lp)
+{
+
+	lp->lp_flags |= LACP_PORT_NTT;
+}
+
+static void
+lacp_run_timers(struct lacp_port *lp)
+{
+	int i;
+
+	for (i = 0; i < LACP_NTIMER; i++) {
+		KASSERT(lp->lp_timer[i] >= 0,
+		    ("invalid timer value %d", lp->lp_timer[i]));
+		if (lp->lp_timer[i] == 0) {
+			continue;
+		} else if (--lp->lp_timer[i] <= 0) {
+			if (lacp_timer_funcs[i]) {
+				(*lacp_timer_funcs[i])(lp);
+			}
+		}
+	}
+}
+
+int
+lacp_marker_input(struct lagg_port *lgp, struct mbuf *m)
+{
+	struct lacp_port *lp = LACP_PORT(lgp);
+	struct lacp_port *lp2;
+	struct lacp_softc *lsc = lp->lp_lsc;
+	struct markerdu *mdu;
+	int error = 0;
+	int pending = 0;
+
+	LAGG_RLOCK_ASSERT(lgp->lp_softc);
+
+	if (m->m_pkthdr.len != sizeof(*mdu)) {
+		goto bad;
+	}
+
+	if ((m->m_flags & M_MCAST) == 0) {
+		goto bad;
+	}
+
+	if (m->m_len < sizeof(*mdu)) {
+		m = m_pullup(m, sizeof(*mdu));
+		if (m == NULL) {
+			return (ENOMEM);
+		}
+	}
+
+	mdu = mtod(m, struct markerdu *);
+
+	if (memcmp(&mdu->mdu_eh.ether_dhost,
+	    &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
+		goto bad;
+	}
+
+	if (mdu->mdu_sph.sph_version != 1) {
+		goto bad;
+	}
+
+	switch (mdu->mdu_tlv.tlv_type) {
+	case MARKER_TYPE_INFO:
+		if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
+		    marker_info_tlv_template, TRUE)) {
+			goto bad;
+		}
+		mdu->mdu_tlv.tlv_type = MARKER_TYPE_RESPONSE;
+		memcpy(&mdu->mdu_eh.ether_dhost,
+		    &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN);
+		memcpy(&mdu->mdu_eh.ether_shost,
+		    lgp->lp_lladdr, ETHER_ADDR_LEN);
+		error = lagg_enqueue(lp->lp_ifp, m);
+		break;
+
+	case MARKER_TYPE_RESPONSE:
+		if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
+		    marker_response_tlv_template, TRUE)) {
+			goto bad;
+		}
+		LACP_DPRINTF((lp, "marker response, port=%u, sys=%6D, id=%u\n",
+		    ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system,
+		    ":", ntohl(mdu->mdu_info.mi_rq_xid)));
+
+		/* Verify that it is the last marker we sent out */
+		if (memcmp(&mdu->mdu_info, &lp->lp_marker,
+		    sizeof(struct lacp_markerinfo)))
+			goto bad;
+
+		lp->lp_flags &= ~LACP_PORT_MARK;
+
+		if (lsc->lsc_suppress_distributing) {
+			/* Check if any ports are waiting for a response */
+			LIST_FOREACH(lp2, &lsc->lsc_ports, lp_next) {
+				if (lp2->lp_flags & LACP_PORT_MARK) {
+					pending = 1;
+					break;
+				}
+			}
+
+			if (pending == 0) {
+				/* All interface queues are clear */
+				LACP_DPRINTF((NULL, "queue flush complete\n"));
+				lsc->lsc_suppress_distributing = FALSE;
+			}
+		}
+
+		m_freem(m);
+		break;
+
+	default:
+		goto bad;
+	}
+
+	return (error);
+
+bad:
+	LACP_DPRINTF((lp, "bad marker frame\n"));
+	m_freem(m);
+	return (EINVAL);
+}
+
+static int
+tlv_check(const void *p, size_t size, const struct tlvhdr *tlv,
+    const struct tlv_template *tmpl, boolean_t check_type)
+{
+	while (/* CONSTCOND */ 1) {
+		if ((const char *)tlv - (const char *)p + sizeof(*tlv) > size) {
+			return (EINVAL);
+		}
+		if ((check_type && tlv->tlv_type != tmpl->tmpl_type) ||
+		    tlv->tlv_length != tmpl->tmpl_length) {
+			return (EINVAL);
+		}
+		if (tmpl->tmpl_type == 0) {
+			break;
+		}
+		tlv = (const struct tlvhdr *)
+		    ((const char *)tlv + tlv->tlv_length);
+		tmpl++;
+	}
+
+	return (0);
+}
+
+#if defined(LACP_DEBUG)
+const char *
+lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen)
+{
+	snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X",
+	    (int)mac[0],
+	    (int)mac[1],
+	    (int)mac[2],
+	    (int)mac[3],
+	    (int)mac[4],
+	    (int)mac[5]);
+
+	return (buf);
+}
+
+const char *
+lacp_format_systemid(const struct lacp_systemid *sysid,
+    char *buf, size_t buflen)
+{
+	char macbuf[LACP_MACSTR_MAX+1];
+
+	snprintf(buf, buflen, "%04X,%s",
+	    ntohs(sysid->lsi_prio),
+	    lacp_format_mac(sysid->lsi_mac, macbuf, sizeof(macbuf)));
+
+	return (buf);
+}
+
+const char *
+lacp_format_portid(const struct lacp_portid *portid, char *buf, size_t buflen)
+{
+	snprintf(buf, buflen, "%04X,%04X",
+	    ntohs(portid->lpi_prio),
+	    ntohs(portid->lpi_portno));
+
+	return (buf);
+}
+
+const char *
+lacp_format_partner(const struct lacp_peerinfo *peer, char *buf, size_t buflen)
+{
+	char sysid[LACP_SYSTEMIDSTR_MAX+1];
+	char portid[LACP_PORTIDSTR_MAX+1];
+
+	snprintf(buf, buflen, "(%s,%04X,%s)",
+	    lacp_format_systemid(&peer->lip_systemid, sysid, sizeof(sysid)),
+	    ntohs(peer->lip_key),
+	    lacp_format_portid(&peer->lip_portid, portid, sizeof(portid)));
+
+	return (buf);
+}
+
+const char *
+lacp_format_lagid(const struct lacp_peerinfo *a,
+    const struct lacp_peerinfo *b, char *buf, size_t buflen)
+{
+	char astr[LACP_PARTNERSTR_MAX+1];
+	char bstr[LACP_PARTNERSTR_MAX+1];
+
+#if 0
+	/*
+	 * there's a convention to display small numbered peer
+	 * in the left.
+	 */
+
+	if (lacp_compare_peerinfo(a, b) > 0) {
+		const struct lacp_peerinfo *t;
+
+		t = a;
+		a = b;
+		b = t;
+	}
+#endif
+
+	snprintf(buf, buflen, "[%s,%s]",
+	    lacp_format_partner(a, astr, sizeof(astr)),
+	    lacp_format_partner(b, bstr, sizeof(bstr)));
+
+	return (buf);
+}
+
+const char *
+lacp_format_lagid_aggregator(const struct lacp_aggregator *la,
+    char *buf, size_t buflen)
+{
+	if (la == NULL) {
+		return ("(none)");
+	}
+
+	return (lacp_format_lagid(&la->la_actor, &la->la_partner, buf, buflen));
+}
+
+const char *
+lacp_format_state(uint8_t state, char *buf, size_t buflen)
+{
+	snprintf(buf, buflen, "%b", state, LACP_STATE_BITS);
+	return (buf);
+}
+
+static void
+lacp_dump_lacpdu(const struct lacpdu *du)
+{
+	char buf[LACP_PARTNERSTR_MAX+1];
+	char buf2[LACP_STATESTR_MAX+1];
+
+	printf("actor=%s\n",
+	    lacp_format_partner(&du->ldu_actor, buf, sizeof(buf)));
+	printf("actor.state=%s\n",
+	    lacp_format_state(du->ldu_actor.lip_state, buf2, sizeof(buf2)));
+	printf("partner=%s\n",
+	    lacp_format_partner(&du->ldu_partner, buf, sizeof(buf)));
+	printf("partner.state=%s\n",
+	    lacp_format_state(du->ldu_partner.lip_state, buf2, sizeof(buf2)));
+
+	printf("maxdelay=%d\n", ntohs(du->ldu_collector.lci_maxdelay));
+}
+
+static void
+lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...)
+{
+	va_list va;
+
+	if (lp) {
+		printf("%s: ", lp->lp_ifp->if_xname);
+	}
+
+	va_start(va, fmt);
+	vprintf(fmt, va);
+	va_end(va);
+}
+#endif
Index: if_loop.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_loop.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/net/if_loop.c -L sys/net/if_loop.c -u -r1.2 -r1.3
--- sys/net/if_loop.c
+++ sys/net/if_loop.c
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_loop.c	8.2 (Berkeley) 1/9/95
- * $FreeBSD: src/sys/net/if_loop.c,v 1.106.2.1 2005/08/25 05:01:20 rwatson Exp $
+ * $FreeBSD: src/sys/net/if_loop.c,v 1.112 2007/02/09 00:09:35 cognet Exp $
  */
 
 /*
@@ -87,7 +87,7 @@
 #elif defined(LARGE_LOMTU)
 #define LOMTU	131072
 #else
-#define LOMTU	32768
+#define LOMTU	16384
 #endif
 
 #define LONAME	"lo"
@@ -101,7 +101,7 @@
 static void	lortrequest(int, struct rtentry *, struct rt_addrinfo *);
 int		looutput(struct ifnet *ifp, struct mbuf *m,
 		    struct sockaddr *dst, struct rtentry *rt);
-static int	lo_clone_create(struct if_clone *, int);
+static int	lo_clone_create(struct if_clone *, int, caddr_t);
 static void	lo_clone_destroy(struct ifnet *);
 
 struct ifnet *loif = NULL;			/* Used externally */
@@ -134,9 +134,10 @@
 }
 
 static int
-lo_clone_create(ifc, unit)
+lo_clone_create(ifc, unit, params)
 	struct if_clone *ifc;
 	int unit;
+	caddr_t params;
 {
 	struct ifnet *ifp;
 	struct lo_softc *sc;
@@ -258,24 +259,42 @@
 	m_tag_delete_nonpersistent(m);
 	m->m_pkthdr.rcvif = ifp;
 
-	/* Let BPF see incoming packet */
-	if (ifp->if_bpf) {
-		if (ifp->if_bpf->bif_dlt == DLT_NULL) {
-			u_int32_t af1 = af;	/* XXX beware sizeof(af) != 4 */
-			/*
-			 * We need to prepend the address family.
-			 */
-			bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
-		} else
+	/*
+	 * Let BPF see incoming packet in the following manner:
+	 *  - Emulated packet loopback for a simplex interface 
+	 *    (net/if_ethersubr.c)
+	 *	-> passes it to ifp's BPF
+	 *  - IPv4/v6 multicast packet loopback (netinet(6)/ip(6)_output.c)
+	 *	-> not passes it to any BPF
+	 *  - Normal packet loopback from myself to myself (net/if_loop.c)
+	 *	-> passes to lo0's BPF (even in case of IPv6, where ifp!=lo0)
+	 */
+	if (hlen > 0) {
+		if (bpf_peers_present(ifp->if_bpf)) {
 			bpf_mtap(ifp->if_bpf, m);
+		}
+	} else {
+		if (bpf_peers_present(loif->if_bpf)) {
+			if ((m->m_flags & M_MCAST) == 0 || loif == ifp) {
+				/* XXX beware sizeof(af) != 4 */
+				u_int32_t af1 = af;	
+
+				/*
+				 * We need to prepend the address family.
+				 */
+				bpf_mtap2(loif->if_bpf, &af1, sizeof(af1), m);
+			}
+		}
 	}
 
 	/* Strip away media header */
 	if (hlen > 0) {
 		m_adj(m, hlen);
-#if defined(__alpha__) || defined(__ia64__) || defined(__sparc64__)
-		/* The alpha doesn't like unaligned data.
-		 * We move data down in the first mbuf */
+#ifndef __NO_STRICT_ALIGNMENT
+		/*
+		 * Some archs do not like unaligned data, so
+		 * we move data down in the first mbuf.
+		 */
 		if (mtod(m, vm_offset_t) & 3) {
 			KASSERT(hlen >= 3, ("if_simloop: hlen too small"));
 			bcopy(m->m_data, 
Index: if_pppvar.h
===================================================================
RCS file: /home/cvs/src/sys/net/if_pppvar.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_pppvar.h -L sys/net/if_pppvar.h -u -r1.1.1.1 -r1.2
--- sys/net/if_pppvar.h
+++ sys/net/if_pppvar.h
@@ -40,7 +40,7 @@
  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  *
- * $FreeBSD: src/sys/net/if_pppvar.h,v 1.23.2.1 2005/08/19 14:52:21 glebius Exp $
+ * $FreeBSD: src/sys/net/if_pppvar.h,v 1.26 2006/12/05 18:54:21 ume Exp $
  */
 
 /*
@@ -48,7 +48,8 @@
  * indexing sc_npmode.
  */
 #define NP_IP	0		/* Internet Protocol */
-#define NUM_NP	1		/* Number of NPs. */
+#define NP_IPV6	1		/* Internet Protocol version 6 */
+#define NUM_NP	2		/* Number of NPs. */
 
 /*
  * Structure describing each ppp unit.
@@ -56,7 +57,7 @@
 struct ppp_softc {
 	struct	ifnet *sc_ifp;		/* network-visible interface */
 /*hi*/	u_int	sc_flags;		/* control/status bits; see if_ppp.h */
-	struct	callout_handle sc_ch;	/* Used for scheduling timeouts */
+	struct	callout sc_timo_ch;	/* Used for scheduling timeouts */
 	void	*sc_devp;		/* pointer to device-dep structure */
 	void	(*sc_start)(struct ppp_softc *);	/* start output proc */
 	void	(*sc_ctlp)(struct ppp_softc *); /* rcvd control pkt */
Index: pfil.h
===================================================================
RCS file: /home/cvs/src/sys/net/pfil.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/pfil.h -L sys/net/pfil.h -u -r1.1.1.1 -r1.2
--- sys/net/pfil.h
+++ sys/net/pfil.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/net/pfil.h,v 1.13 2005/01/07 01:45:35 imp Exp $ */
+/*	$FreeBSD: src/sys/net/pfil.h,v 1.16 2007/06/08 12:43:25 gallatin Exp $ */
 /*	$NetBSD: pfil.h,v 1.22 2003/06/23 12:57:08 martin Exp $	*/
 
 /*-
@@ -36,7 +36,8 @@
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
-#include <sys/condvar.h>	/* XXX */
+#include <sys/lock.h>
+#include <sys/rwlock.h>
 
 struct mbuf;
 struct ifnet;
@@ -67,14 +68,8 @@
 	pfil_list_t	ph_in;
 	pfil_list_t	ph_out;
 	int		ph_type;
-	/*
-	 * Locking: use a busycounter per pfil_head.
-	 * Use ph_busy_count = -1 to indicate pfil_head is empty.
-	 */
-	int		ph_busy_count;	/* count of threads with read lock */
-	int		ph_want_write;	/* want write lock flag */
-	struct cv	ph_cv;		/* for waking up writers */
-	struct mtx	ph_mtx;		/* mutex on locking state */
+	int		ph_nhooks;
+	struct rwlock	ph_mtx;
 	union {
 		u_long		phu_val;
 		void		*phu_ptr;
@@ -97,11 +92,17 @@
 
 struct pfil_head *pfil_head_get(int, u_long);
 
+#define	PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
+#define PFIL_RLOCK(p) rw_rlock(&(p)->ph_mtx)
+#define PFIL_WLOCK(p) rw_wlock(&(p)->ph_mtx)
+#define PFIL_RUNLOCK(p) rw_runlock(&(p)->ph_mtx)
+#define PFIL_WUNLOCK(p) rw_wunlock(&(p)->ph_mtx)
+#define PFIL_LIST_LOCK() mtx_lock(&pfil_global_lock)
+#define PFIL_LIST_UNLOCK() mtx_unlock(&pfil_global_lock)
+
 static __inline struct packet_filter_hook *
 pfil_hook_get(int dir, struct pfil_head *ph)
 {
-	KASSERT(ph->ph_busy_count > 0, 
-	    ("pfil_hook_get: called on unbusy pfil_head"));
 	if (dir == PFIL_IN)
 		return (TAILQ_FIRST(&ph->ph_in));
 	else if (dir == PFIL_OUT)
Index: if_vlan_var.h
===================================================================
RCS file: /home/cvs/src/sys/net/if_vlan_var.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/net/if_vlan_var.h -L sys/net/if_vlan_var.h -u -r1.1.1.2 -r1.2
--- sys/net/if_vlan_var.h
+++ sys/net/if_vlan_var.h
@@ -26,7 +26,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/net/if_vlan_var.h,v 1.21.2.2 2006/01/13 19:21:45 glebius Exp $
+ * $FreeBSD: src/sys/net/if_vlan_var.h,v 1.26 2007/02/28 22:05:30 bms Exp $
  */
 
 #ifndef _NET_IF_VLAN_VAR_H_
@@ -40,9 +40,40 @@
 	u_int16_t evl_proto;
 };
 
-#define EVL_VLID_MASK	0x0FFF
-#define	EVL_VLANOFTAG(tag) ((tag) & EVL_VLID_MASK)
-#define	EVL_PRIOFTAG(tag) (((tag) >> 13) & 7)
+#define	EVL_VLID_MASK		0x0FFF
+#define	EVL_PRI_MASK		0xE000
+#define	EVL_VLANOFTAG(tag)	((tag) & EVL_VLID_MASK)
+#define	EVL_PRIOFTAG(tag)	(((tag) >> 13) & 7)
+#define	EVL_CFIOFTAG(tag)	(((tag) >> 12) & 1)
+#define	EVL_MAKETAG(vlid, pri, cfi)					\
+	((((((pri) & 7) << 1) | ((cfi) & 1)) << 12) | ((vlid) & EVL_VLID_MASK))
+
+/* Set the VLAN ID in an mbuf packet header non-destructively. */
+#define EVL_APPLY_VLID(m, vlid)						\
+	do {								\
+		if ((m)->m_flags & M_VLANTAG) {				\
+			(m)->m_pkthdr.ether_vtag &= EVL_VLID_MASK;	\
+			(m)->m_pkthdr.ether_vtag |= (vlid);		\
+		} else {						\
+			(m)->m_pkthdr.ether_vtag = (vlid);		\
+			(m)->m_flags |= M_VLANTAG;			\
+		}							\
+	} while (0)
+
+/* Set the priority ID in an mbuf packet header non-destructively. */
+#define EVL_APPLY_PRI(m, pri)						\
+	do {								\
+		if ((m)->m_flags & M_VLANTAG) {				\
+			uint16_t __vlantag = (m)->m_pkthdr.ether_vtag;	\
+			(m)->m_pkthdr.ether_vtag |= EVL_MAKETAG(	\
+			    EVL_VLANOFTAG(__vlantag), (pri),		\
+			    EVL_CFIOFTAG(__vlantag));			\
+		} else {						\
+			(m)->m_pkthdr.ether_vtag =			\
+			    EVL_MAKETAG(0, (pri), 0);			\
+			(m)->m_flags |= M_VLANTAG;			\
+		}							\
+	} while (0)
 
 /* sysctl(3) tags, for compatibility purposes */
 #define	VLANCTL_PROTO	1
@@ -70,77 +101,37 @@
  */
 
 /*
- * Drivers that support hardware VLAN tagging pass a packet's tag
- * up through the stack by appending a packet tag with this value.
- * Output is handled likewise, the driver must locate the packet
- * tag to extract the VLAN tag.  The following macros are used to
- * do this work.  On input, do:
+ * VLAN tags are stored in host byte order.  Byte swapping may be
+ * necessary.
  *
- *	VLAN_INPUT_TAG(ifp, m, tag,);
+ * Drivers that support hardware VLAN tag stripping fill in the
+ * received VLAN tag (containing both vlan and priority information)
+ * into the ether_vtag mbuf packet header field:
+ * 
+ *	m->m_pkthdr.ether_vtag = vlan_id;	// ntohs()?
+ *	m->m_flags |= M_VLANTAG;
  *
- * to mark the packet m with the specified VLAN tag.  The last
- * parameter provides code to execute in case of an error.  On
- * output the driver should check mbuf to see if a VLAN tag is
- * present and only then check for a tag; this is done with:
+ * to mark the packet m with the specified VLAN tag.
  *
- *	struct m_tag *mtag;
- *	mtag = VLAN_OUTPUT_TAG(ifp, m);
- *	if (mtag != NULL) {
- *		... = VLAN_TAG_VALUE(mtag);
+ * On output the driver should check the mbuf for the M_VLANTAG
+ * flag to see if a VLAN tag is present and valid:
+ *
+ *	if (m->m_flags & M_VLANTAG) {
+ *		... = m->m_pkthdr.ether_vtag;	// htons()?
  *		... pass tag to hardware ...
  *	}
  *
  * Note that a driver must indicate it supports hardware VLAN
- * tagging by marking IFCAP_VLAN_HWTAGGING in if_capabilities.
- */
-#define	MTAG_VLAN	1035328035
-#define	MTAG_VLAN_TAG	0		/* tag of VLAN interface */
-
-/*
- * This macro must expand to a lvalue so that it can be used
- * to set a tag with a simple assignment.
- */
-#define	VLAN_TAG_VALUE(_mt)	(*(u_int *)((_mt) + 1))
-
-/*
- * This macro is kept for API compatibility. 
+ * stripping/insertion by marking IFCAP_VLAN_HWTAGGING in
+ * if_capabilities.
  */
-#define	VLAN_INPUT_TAG(_ifp, _m, _t, _errcase) do {	\
-	struct m_tag *mtag;				\
-	mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG,	\
-	    sizeof (u_int), M_NOWAIT);			\
-	if (mtag != NULL) {				\
-		VLAN_TAG_VALUE(mtag) = (_t);		\
-		m_tag_prepend((_m), mtag);		\
-		(_m)->m_flags |= M_VLANTAG;		\
-	} else {					\
-		(_ifp)->if_ierrors++;			\
-		m_freem(_m);				\
-		_errcase;				\
-	}						\
-} while (0)
 
-/*
- * This macro is equal to VLAN_INPUT_TAG() in HEAD.
- */
-#define	VLAN_INPUT_TAG_NEW(_ifp, _m, _t) do {			\
-	struct m_tag *mtag;					\
-	mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG,		\
-			   sizeof (u_int), M_NOWAIT);		\
-	if (mtag != NULL) {					\
-		VLAN_TAG_VALUE(mtag) = (_t);			\
-		m_tag_prepend((_m), mtag);			\
-		(_m)->m_flags |= M_VLANTAG;			\
-	} else {						\
-		(_ifp)->if_ierrors++;				\
-		m_freem(_m);					\
-		_m = NULL;					\
-	}							\
+#define	VLAN_CAPABILITIES(_ifp) do {				\
+	if ((_ifp)->if_vlantrunk != NULL) 			\
+		(*vlan_trunk_cap_p)(_ifp);			\
 } while (0)
 
-#define	VLAN_OUTPUT_TAG(_ifp, _m)				\
-	((_m)->m_flags & M_VLANTAG ?				\
-		m_tag_locate((_m), MTAG_VLAN, MTAG_VLAN_TAG, NULL) : NULL)
+extern	void (*vlan_trunk_cap_p)(struct ifnet *);
 #endif /* _KERNEL */
 
 #endif /* _NET_IF_VLAN_VAR_H_ */
Index: route.h
===================================================================
RCS file: /home/cvs/src/sys/net/route.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/net/route.h -L sys/net/route.h -u -r1.2 -r1.3
--- sys/net/route.h
+++ sys/net/route.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)route.h	8.4 (Berkeley) 1/9/95
- * $FreeBSD: src/sys/net/route.h,v 1.63.2.1 2006/04/04 20:07:23 andre Exp $
+ * $FreeBSD: src/sys/net/route.h,v 1.65.4.1 2008/01/09 15:23:36 mux Exp $
  */
 
 #ifndef _NET_ROUTE_H_
@@ -289,6 +289,7 @@
 #define	RT_LOCK_INIT(_rt) \
 	mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK)
 #define	RT_LOCK(_rt)		mtx_lock(&(_rt)->rt_mtx)
+#define	RT_TRYLOCK(_rt)		mtx_trylock(&(_rt)->rt_mtx)
 #define	RT_UNLOCK(_rt)		mtx_unlock(&(_rt)->rt_mtx)
 #define	RT_LOCK_DESTROY(_rt)	mtx_destroy(&(_rt)->rt_mtx)
 #define	RT_LOCK_ASSERT(_rt)	mtx_assert(&(_rt)->rt_mtx, MA_OWNED)
@@ -298,13 +299,13 @@
 	KASSERT((_rt)->rt_refcnt >= 0,				\
 		("negative refcnt %ld", (_rt)->rt_refcnt));	\
 	(_rt)->rt_refcnt++;					\
-} while (0);
+} while (0)
 #define	RT_REMREF(_rt)	do {					\
 	RT_LOCK_ASSERT(_rt);					\
 	KASSERT((_rt)->rt_refcnt > 0,				\
 		("bogus refcnt %ld", (_rt)->rt_refcnt));	\
 	(_rt)->rt_refcnt--;					\
-} while (0);
+} while (0)
 
 #define	RTFREE_LOCKED(_rt) do {					\
 		if ((_rt)->rt_refcnt <= 1)			\
Index: if_clone.h
===================================================================
RCS file: /home/cvs/src/sys/net/if_clone.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_clone.h -L sys/net/if_clone.h -u -r1.1.1.1 -r1.2
--- sys/net/if_clone.h
+++ sys/net/if_clone.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	From: @(#)if.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/net/if_clone.h,v 1.2 2005/01/07 01:45:34 imp Exp $
+ * $FreeBSD: src/sys/net/if_clone.h,v 1.4 2006/07/09 06:04:00 sam Exp $
  */
 
 #ifndef	_NET_IF_CLONE_H_
@@ -61,18 +61,19 @@
 	/* (c) Driver specific cloning functions.  Called with no locks held. */
 	void	(*ifc_attach)(struct if_clone *);
 	int	(*ifc_match)(struct if_clone *, const char *);
-	int	(*ifc_create)(struct if_clone *, char *, size_t);
+	int	(*ifc_create)(struct if_clone *, char *, size_t, caddr_t);
 	int	(*ifc_destroy)(struct if_clone *, struct ifnet *);
 
 	long ifc_refcnt;		/* (i) Refrence count. */
 	struct mtx ifc_mtx;		/* Muted to protect members. */
+	LIST_HEAD(, ifnet) ifc_iflist;	/* (i) List of cloned interfaces */
 };
 
 void	if_clone_init(void);
 void	if_clone_attach(struct if_clone *);
 void	if_clone_detach(struct if_clone *);
 
-int	if_clone_create(char *, size_t);
+int	if_clone_create(char *, size_t, caddr_t);
 int	if_clone_destroy(const char *);
 int	if_clone_list(struct if_clonereq *);
 
@@ -88,7 +89,7 @@
 struct ifc_simple_data {
 	int ifcs_minifs;		/* minimum number of interfaces */
 
-	int	(*ifcs_create)(struct if_clone *, int);
+	int	(*ifcs_create)(struct if_clone *, int, caddr_t);
 	void	(*ifcs_destroy)(struct ifnet *);
 };
 
@@ -105,7 +106,7 @@
 
 void	ifc_simple_attach(struct if_clone *);
 int	ifc_simple_match(struct if_clone *, const char *);
-int	ifc_simple_create(struct if_clone *, char *, size_t);
+int	ifc_simple_create(struct if_clone *, char *, size_t, caddr_t);
 int	ifc_simple_destroy(struct if_clone *, struct ifnet *);
 
 #endif /* _KERNEL */
Index: radix.c
===================================================================
RCS file: /home/cvs/src/sys/net/radix.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/net/radix.c -L sys/net/radix.c -u -r1.1.1.2 -r1.2
--- sys/net/radix.c
+++ sys/net/radix.c
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)radix.c	8.5 (Berkeley) 5/19/95
- * $FreeBSD: src/sys/net/radix.c,v 1.37.2.1 2006/02/16 01:10:44 qingli Exp $
+ * $FreeBSD: src/sys/net/radix.c,v 1.38 2006/02/07 20:25:39 qingli Exp $
  */
 
 /*
Index: if_fwsubr.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_fwsubr.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/net/if_fwsubr.c -L sys/net/if_fwsubr.c -u -r1.1.1.2 -r1.2
--- sys/net/if_fwsubr.c
+++ sys/net/if_fwsubr.c
@@ -27,7 +27,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/net/if_fwsubr.c,v 1.12.2.5 2006/01/24 06:30:51 brooks Exp $
+ * $FreeBSD: src/sys/net/if_fwsubr.c,v 1.24 2007/06/05 14:15:45 simokawa Exp $
  */
 
 #include "opt_inet.h"
@@ -37,7 +37,6 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
@@ -62,6 +61,8 @@
 #include <netinet6/nd6.h>
 #endif
 
+#include <security/mac/mac_framework.h>
+
 MALLOC_DEFINE(M_FWCOM, "fw_com", "firewire interface internals");
 
 struct fw_hwaddr firewire_broadcastaddr = {
@@ -190,7 +191,7 @@
 	/*
 	 * Let BPF tap off a copy before we encapsulate.
 	 */
-	if (ifp->if_bpf) {
+	if (bpf_peers_present(ifp->if_bpf)) {
 		struct fw_bpfhdr h;
 		if (unicast)
 			bcopy(destfw, h.firewire_dhost, 8);
@@ -337,8 +338,6 @@
 	int fstart, fend, start, end, islast;
 	uint32_t id;
 
-	GIANT_REQUIRED;
-
 	/*
 	 * Find an existing reassembly buffer or create a new one.
 	 */
@@ -501,8 +500,6 @@
 	union fw_encap *enc;
 	int type, isr;
 
-	GIANT_REQUIRED;
-
 	/*
 	 * The caller has already stripped off the packet header
 	 * (stream or wreqb) and marked the mbuf's M_BCAST flag
@@ -516,6 +513,8 @@
 	}
 
 	m = m_pullup(m, sizeof(uint32_t));
+	if (m == NULL)
+		return;
 	enc = mtod(m, union fw_encap *);
 
 	/*
@@ -565,7 +564,7 @@
 	 * Give bpf a chance at the packet. The link-level driver
 	 * should have left us a tag with the EUID of the sender.
 	 */
-	if (ifp->if_bpf) {
+	if (bpf_peers_present(ifp->if_bpf)) {
 		struct fw_bpfhdr h;
 		struct m_tag *mtag;
 
@@ -601,7 +600,7 @@
 	switch (type) {
 #ifdef INET
 	case ETHERTYPE_IP:
-		if (ip_fastforward(m))
+		if ((m = ip_fastforward(m)) == NULL)
 			return;
 		isr = NETISR_IP;
 		break;
@@ -764,7 +763,7 @@
 	ifp->if_resolvemulti = firewire_resolvemulti;
 	ifp->if_broadcastaddr = (u_char *) &firewire_broadcastaddr;
 
-	ifa = ifaddr_byindex(ifp->if_index);
+	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_IEEE1394;
Index: if_tap.h
===================================================================
RCS file: /home/cvs/src/sys/net/if_tap.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/net/if_tap.h -L sys/net/if_tap.h -u -r1.1.1.2 -r1.2
--- sys/net/if_tap.h
+++ sys/net/if_tap.h
@@ -31,7 +31,7 @@
  */
 
 /*
- * $FreeBSD: src/sys/net/if_tap.h,v 1.2 2005/01/07 01:45:34 imp Exp $
+ * $FreeBSD: src/sys/net/if_tap.h,v 1.3 2006/09/27 19:57:01 ru Exp $
  * $Id: if_tap.h,v 0.7 2000/07/12 04:12:51 max Exp $
  */
 
@@ -57,7 +57,7 @@
 #define	TAPGIFINFO		_IOR('t', 92, struct tapinfo)
 
 /* VMware ioctl's */
-#define VMIO_SIOCSIFFLAGS	_IO('V', 0)
+#define VMIO_SIOCSIFFLAGS	_IOWINT('V', 0)
 #define VMIO_SIOCSKEEP		_IO('V', 1)
 #define VMIO_SIOCSIFBR		_IO('V', 2)
 #define VMIO_SIOCSLADRF		_IO('V', 3)
Index: if_iso88025subr.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_iso88025subr.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/if_iso88025subr.c -L sys/net/if_iso88025subr.c -u -r1.1.1.1 -r1.2
--- sys/net/if_iso88025subr.c
+++ sys/net/if_iso88025subr.c
@@ -30,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/net/if_iso88025subr.c,v 1.67.2.3 2005/08/25 05:01:20 rwatson Exp $
+ * $FreeBSD: src/sys/net/if_iso88025subr.c,v 1.75 2006/10/22 11:52:15 rwatson Exp $
  *
  */
 
@@ -48,7 +48,6 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
@@ -79,6 +78,8 @@
 #include <netipx/ipx_if.h>
 #endif
 
+#include <security/mac/mac_framework.h>
+
 static const u_char iso88025_broadcastaddr[ISO88025_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
@@ -91,7 +92,7 @@
  * Perform common duties while attaching to interface list
  */
 void
-iso88025_ifattach(struct ifnet *ifp, int bpf)
+iso88025_ifattach(struct ifnet *ifp, const u_int8_t *lla, int bpf)
 {
     struct ifaddr *ifa;
     struct sockaddr_dl *sdl;
@@ -114,16 +115,13 @@
     if (ifp->if_mtu == 0)
         ifp->if_mtu = ISO88025_DEFAULT_MTU;
 
-    ifa = ifaddr_byindex(ifp->if_index);
-    if (ifa == 0) {
-        if_printf(ifp, "%s() no lladdr!\n", __func__);
-        return;
-    }
+    ifa = ifp->if_addr;
+    KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 
     sdl = (struct sockaddr_dl *)ifa->ifa_addr;
     sdl->sdl_type = IFT_ISO88025;
     sdl->sdl_alen = ifp->if_addrlen;
-    bcopy(IFP2ENADDR(ifp), LLADDR(sdl), ifp->if_addrlen);
+    bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
     if (bpf)
         bpfattach(ifp, DLT_IEEE802, ISO88025_HDR_LEN);
@@ -181,10 +179,10 @@
 
 				if (ipx_nullhost(*ina))
 					ina->x_host = *(union ipx_host *)
-							IFP2ENADDR(ifp);
+							IF_LLADDR(ifp);
 				else
 					bcopy((caddr_t) ina->x_host.c_host,
-					      (caddr_t) IFP2ENADDR(ifp),
+					      (caddr_t) IF_LLADDR(ifp),
 					      ISO88025_ADDR_LEN);
 
 				/*
@@ -204,7 +202,7 @@
                         struct sockaddr *sa;
 
                         sa = (struct sockaddr *) & ifr->ifr_data;
-                        bcopy(IFP2ENADDR(ifp),
+                        bcopy(IF_LLADDR(ifp),
                               (caddr_t) sa->sa_data, ISO88025_ADDR_LEN);
                 }
                 break;
@@ -274,7 +272,7 @@
 	/* Generate a generic 802.5 header for the packet */
 	gen_th.ac = TR_AC;
 	gen_th.fc = TR_LLC_FRAME;
-	(void)memcpy((caddr_t)gen_th.iso88025_shost, IFP2ENADDR(ifp),
+	(void)memcpy((caddr_t)gen_th.iso88025_shost, IF_LLADDR(ifp),
 		     ISO88025_ADDR_LEN);
 	if (rif_len) {
 		gen_th.iso88025_shost[0] |= TR_RII;
@@ -520,7 +518,7 @@
 	 */
 	if ((ifp->if_flags & IFF_PROMISC) &&
 	    ((th->iso88025_dhost[0] & 1) == 0) &&
-	     (bcmp(IFP2ENADDR(ifp), (caddr_t) th->iso88025_dhost,
+	     (bcmp(IF_LLADDR(ifp), (caddr_t) th->iso88025_dhost,
 	     ISO88025_ADDR_LEN) != 0))
 		goto dropanyway;
 
@@ -586,7 +584,7 @@
 #ifdef INET
 		case ETHERTYPE_IP:
 			th->iso88025_shost[0] &= ~(TR_RII); 
-			if (ip_fastforward(m))
+			if ((m = ip_fastforward(m)) == NULL)
 				return;
 			isr = NETISR_IP;
 			break;
@@ -650,7 +648,7 @@
 			l->llc_dsap = l->llc_ssap;
 			l->llc_ssap = c;
 			if (m->m_flags & (M_BCAST | M_MCAST))
-				bcopy((caddr_t)IFP2ENADDR(ifp),
+				bcopy((caddr_t)IF_LLADDR(ifp),
 				      (caddr_t)th->iso88025_dhost,
 					ISO88025_ADDR_LEN);
 			sa.sa_family = AF_UNSPEC;
--- /dev/null
+++ sys/net/if_lagg.h
@@ -0,0 +1,243 @@
+/*	$OpenBSD: if_trunk.h,v 1.11 2007/01/31 06:20:19 reyk Exp $	*/
+
+/*
+ * Copyright (c) 2005, 2006 Reyk Floeter <reyk at openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * $FreeBSD: src/sys/net/if_lagg.h,v 1.10.2.1 2007/10/18 01:28:30 thompsa Exp $
+ */
+
+#ifndef _NET_LAGG_H
+#define _NET_LAGG_H
+
+/*
+ * Global definitions
+ */
+
+#define	LAGG_MAX_PORTS		32	/* logically */
+#define	LAGG_MAX_NAMESIZE	32	/* name of a protocol */
+#define	LAGG_MAX_STACKING	4	/* maximum number of stacked laggs */
+
+/* Port flags */
+#define	LAGG_PORT_SLAVE		0x00000000	/* normal enslaved port */
+#define	LAGG_PORT_MASTER	0x00000001	/* primary port */
+#define	LAGG_PORT_STACK		0x00000002	/* stacked lagg port */
+#define	LAGG_PORT_ACTIVE	0x00000004	/* port is active */
+#define	LAGG_PORT_COLLECTING	0x00000008	/* port is receiving frames */
+#define	LAGG_PORT_DISTRIBUTING	0x00000010	/* port is sending frames */
+#define	LAGG_PORT_DISABLED	0x00000020	/* port is disabled */
+#define	LAGG_PORT_BITS		"\20\01MASTER\02STACK\03ACTIVE\04COLLECTING" \
+				  "\05DISTRIBUTING\06DISABLED"
+
+/* Supported lagg PROTOs */
+#define	LAGG_PROTO_NONE		0	/* no lagg protocol defined */
+#define	LAGG_PROTO_ROUNDROBIN	1	/* simple round robin */
+#define	LAGG_PROTO_FAILOVER	2	/* active failover */
+#define	LAGG_PROTO_LOADBALANCE	3	/* loadbalance */
+#define	LAGG_PROTO_LACP		4	/* 802.3ad lacp */
+#define	LAGG_PROTO_ETHERCHANNEL	5	/* Cisco FEC */
+#define	LAGG_PROTO_MAX		6
+
+struct lagg_protos {
+	const char		*lpr_name;
+	int			lpr_proto;
+};
+
+#define	LAGG_PROTO_DEFAULT	LAGG_PROTO_FAILOVER
+#define LAGG_PROTOS	{						\
+	{ "failover",		LAGG_PROTO_FAILOVER },			\
+	{ "fec",		LAGG_PROTO_ETHERCHANNEL },		\
+	{ "lacp",		LAGG_PROTO_LACP },			\
+	{ "loadbalance",	LAGG_PROTO_LOADBALANCE },		\
+	{ "roundrobin",		LAGG_PROTO_ROUNDROBIN },		\
+	{ "none",		LAGG_PROTO_NONE },			\
+	{ "default",		LAGG_PROTO_DEFAULT }			\
+}
+
+/*
+ * lagg ioctls.
+ */
+
+/*
+ * LACP current operational parameters structure.
+ */
+struct lacp_opreq {
+	uint16_t		actor_prio;
+	uint8_t			actor_mac[ETHER_ADDR_LEN];
+	uint16_t		actor_key;
+	uint16_t		actor_portprio;
+	uint16_t		actor_portno;
+	uint8_t			actor_state;
+	uint16_t		partner_prio;
+	uint8_t			partner_mac[ETHER_ADDR_LEN];
+	uint16_t		partner_key;
+	uint16_t		partner_portprio;
+	uint16_t		partner_portno;
+	uint8_t			partner_state;
+};
+
+/* lagg port settings */
+struct lagg_reqport {
+	char			rp_ifname[IFNAMSIZ];	/* name of the lagg */
+	char			rp_portname[IFNAMSIZ];	/* name of the port */
+	u_int32_t		rp_prio;		/* port priority */
+	u_int32_t		rp_flags;		/* port flags */
+	union {
+		struct lacp_opreq rpsc_lacp;
+	} rp_psc;
+#define rp_lacpreq	rp_psc.rpsc_lacp
+};
+
+#define	SIOCGLAGGPORT		_IOWR('i', 140, struct lagg_reqport)
+#define	SIOCSLAGGPORT		 _IOW('i', 141, struct lagg_reqport)
+#define	SIOCSLAGGDELPORT	 _IOW('i', 142, struct lagg_reqport)
+
+/* lagg, ports and options */
+struct lagg_reqall {
+	char			ra_ifname[IFNAMSIZ];	/* name of the lagg */
+	u_int			ra_proto;		/* lagg protocol */
+
+	size_t			ra_size;		/* size of buffer */
+	struct lagg_reqport	*ra_port;		/* allocated buffer */
+	int			ra_ports;		/* total port count */
+	union {
+		struct lacp_opreq rpsc_lacp;
+	} ra_psc;
+#define ra_lacpreq	ra_psc.rpsc_lacp
+};
+
+#define	SIOCGLAGG		_IOWR('i', 143, struct lagg_reqall)
+#define	SIOCSLAGG		 _IOW('i', 144, struct lagg_reqall)
+
+#ifdef _KERNEL
+/*
+ * Internal kernel part
+ */
+
+#define	lp_ifname		lp_ifp->if_xname	/* interface name */
+#define	lp_link_state		lp_ifp->if_link_state	/* link state */
+
+#define	LAGG_PORTACTIVE(_tp)	(					\
+	((_tp)->lp_link_state == LINK_STATE_UP) &&			\
+	((_tp)->lp_ifp->if_flags & IFF_UP)				\
+)
+
+struct lagg_ifreq {
+	union {
+		struct ifreq ifreq;
+		struct {
+			char ifr_name[IFNAMSIZ];
+			struct sockaddr_storage ifr_ss;
+		} ifreq_storage;
+	} ifreq;
+};
+
+#define	sc_ifflags		sc_ifp->if_flags		/* flags */
+#define	sc_ifname		sc_ifp->if_xname		/* name */
+#define	sc_capabilities		sc_ifp->if_capabilities	/* capabilities */
+
+#define	IFCAP_LAGG_MASK		0xffff0000	/* private capabilities */
+#define	IFCAP_LAGG_FULLDUPLEX	0x00010000	/* full duplex with >1 ports */
+
+/* Private data used by the loadbalancing protocol */
+struct lagg_lb {
+	u_int32_t		lb_key;
+	struct lagg_port	*lb_ports[LAGG_MAX_PORTS];
+};
+
+struct lagg_mc {
+	struct ifmultiaddr      *mc_ifma;
+	SLIST_ENTRY(lagg_mc)	mc_entries;
+};
+
+/* List of interfaces to have the MAC address modified */
+struct lagg_llq {
+	struct ifnet		*llq_ifp;
+	uint8_t			llq_lladdr[ETHER_ADDR_LEN];
+	SLIST_ENTRY(lagg_llq)	llq_entries;
+};
+
+struct lagg_softc {
+	struct ifnet			*sc_ifp;	/* virtual interface */
+	struct rwlock			sc_mtx;
+	int				sc_proto;	/* lagg protocol */
+	u_int				sc_count;	/* number of ports */
+	struct lagg_port		*sc_primary;	/* primary port */
+	struct ifmedia			sc_media;	/* media config */
+	caddr_t				sc_psc;		/* protocol data */
+	uint32_t			sc_seq;		/* sequence counter */
+
+	SLIST_HEAD(__tplhd, lagg_port)	sc_ports;	/* list of interfaces */
+	SLIST_ENTRY(lagg_softc)	sc_entries;
+
+	struct task			sc_lladdr_task;
+	SLIST_HEAD(__llqhd, lagg_llq)	sc_llq_head;	/* interfaces to program
+							   the lladdr on */
+
+	/* lagg protocol callbacks */
+	int	(*sc_detach)(struct lagg_softc *);
+	int	(*sc_start)(struct lagg_softc *, struct mbuf *);
+	struct mbuf *(*sc_input)(struct lagg_softc *, struct lagg_port *,
+		    struct mbuf *);
+	int	(*sc_port_create)(struct lagg_port *);
+	void	(*sc_port_destroy)(struct lagg_port *);
+	void	(*sc_linkstate)(struct lagg_port *);
+	void	(*sc_init)(struct lagg_softc *);
+	void	(*sc_stop)(struct lagg_softc *);
+	void	(*sc_lladdr)(struct lagg_softc *);
+	void	(*sc_req)(struct lagg_softc *, caddr_t);
+	void	(*sc_portreq)(struct lagg_port *, caddr_t);
+};
+
+struct lagg_port {
+	struct ifnet			*lp_ifp;	/* physical interface */
+	struct lagg_softc		*lp_softc;	/* parent lagg */
+	uint8_t				lp_lladdr[ETHER_ADDR_LEN];
+
+	u_char				lp_iftype;	/* interface type */
+	uint32_t			lp_prio;	/* port priority */
+	uint32_t			lp_flags;	/* port flags */
+	int				lp_ifflags;	/* saved ifp flags */
+	void				*lh_cookie;	/* if state hook */
+	caddr_t				lp_psc;		/* protocol data */
+	int				lp_detaching;	/* ifnet is detaching */
+
+	SLIST_HEAD(__mclhd, lagg_mc)	lp_mc_head;	/* multicast addresses */
+
+	/* Redirected callbacks */
+	int	(*lp_ioctl)(struct ifnet *, u_long, caddr_t);
+	int	(*lp_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
+		     struct rtentry *);
+
+	SLIST_ENTRY(lagg_port)		lp_entries;
+};
+
+#define	LAGG_LOCK_INIT(_sc)	rw_init(&(_sc)->sc_mtx, "if_lagg rwlock")
+#define	LAGG_LOCK_DESTROY(_sc)	rw_destroy(&(_sc)->sc_mtx)
+#define	LAGG_RLOCK(_sc)		rw_rlock(&(_sc)->sc_mtx)
+#define	LAGG_WLOCK(_sc)		rw_wlock(&(_sc)->sc_mtx)
+#define	LAGG_RUNLOCK(_sc)	rw_runlock(&(_sc)->sc_mtx)
+#define	LAGG_WUNLOCK(_sc)	rw_wunlock(&(_sc)->sc_mtx)
+#define	LAGG_RLOCK_ASSERT(_sc)	rw_assert(&(_sc)->sc_mtx, RA_RLOCKED)
+#define	LAGG_WLOCK_ASSERT(_sc)	rw_assert(&(_sc)->sc_mtx, RA_WLOCKED)
+
+extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
+extern void	(*lagg_linkstate_p)(struct ifnet *, int );
+
+int		lagg_enqueue(struct ifnet *, struct mbuf *);
+uint32_t	lagg_hashmbuf(struct mbuf *, uint32_t);
+
+#endif /* _KERNEL */
+
+#endif /* _NET_LAGG_H */
Index: if_tap.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_tap.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/net/if_tap.c -L sys/net/if_tap.c -u -r1.1.1.2 -r1.2
--- sys/net/if_tap.c
+++ sys/net/if_tap.c
@@ -31,10 +31,11 @@
  */
 
 /*
- * $FreeBSD: src/sys/net/if_tap.c,v 1.55.2.2 2005/08/25 05:01:20 rwatson Exp $
+ * $FreeBSD: src/sys/net/if_tap.c,v 1.71 2007/03/19 18:17:31 bms Exp $
  * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
  */
 
+#include "opt_compat.h"
 #include "opt_inet.h"
 
 #include <sys/param.h>
@@ -46,6 +47,7 @@
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/poll.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/selinfo.h>
 #include <sys/signalvar.h>
@@ -60,7 +62,8 @@
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
-#include <net/if_arp.h>
+#include <net/if_clone.h>
+#include <net/if_dl.h>
 #include <net/route.h>
 #include <net/if_types.h>
 
@@ -91,6 +94,14 @@
 static int		tapifioctl(struct ifnet *, u_long, caddr_t);
 static void		tapifinit(void *);
 
+static int		tap_clone_create(struct if_clone *, int, caddr_t);
+static void		tap_clone_destroy(struct ifnet *);
+static int		vmnet_clone_create(struct if_clone *, int, caddr_t);
+static void		vmnet_clone_destroy(struct ifnet *);
+
+IFC_SIMPLE_DECLARE(tap, 0);
+IFC_SIMPLE_DECLARE(vmnet, 0);
+
 /* character device */
 static d_open_t		tapopen;
 static d_close_t	tapclose;
@@ -98,6 +109,26 @@
 static d_write_t	tapwrite;
 static d_ioctl_t	tapioctl;
 static d_poll_t		tappoll;
+static d_kqfilter_t	tapkqfilter;
+
+/* kqueue(2) */
+static int		tapkqread(struct knote *, long);
+static int		tapkqwrite(struct knote *, long);
+static void		tapkqdetach(struct knote *);
+
+static struct filterops	tap_read_filterops = {
+	.f_isfd =	1,
+	.f_attach =	NULL,
+	.f_detach =	tapkqdetach,
+	.f_event =	tapkqread,
+};
+
+static struct filterops	tap_write_filterops = {
+	.f_isfd =	1,
+	.f_attach =	NULL,
+	.f_detach =	tapkqdetach,
+	.f_event =	tapkqwrite,
+};
 
 static struct cdevsw	tap_cdevsw = {
 	.d_version =	D_VERSION,
@@ -109,6 +140,7 @@
 	.d_ioctl =	tapioctl,
 	.d_poll =	tappoll,
 	.d_name =	CDEV_NAME,
+	.d_kqfilter =	tapkqfilter,
 };
 
 /*
@@ -118,7 +150,9 @@
  */
 static struct mtx		tapmtx;
 static int			tapdebug = 0;        /* debug flag   */
-static int			tapuopen = 0;        /* allow user open() */	     
+static int			tapuopen = 0;        /* allow user open() */
+static int			tapuponopen = 0;    /* IFF_UP on open() */
+static int			tapdclone = 1;	/* enable devfs cloning */
 static SLIST_HEAD(, tap_softc)	taphead;             /* first device */
 static struct clonedevs 	*tapclones;
 
@@ -131,25 +165,100 @@
     "Ethernet tunnel software network interface");
 SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
 	"Allow user to open /dev/tap (based on node permissions)");
+SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
+	"Bring interface up when /dev/tap is opened");
+SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tapdclone, 0,
+	"Enably legacy devfs interface creation");
 SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
 
+TUNABLE_INT("net.link.tap.devfs_cloning", &tapdclone);
+
 DEV_MODULE(if_tap, tapmodevent, NULL);
 
+static int
+tap_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+	struct cdev *dev;
+	int i;
+	int extra;
+
+	if (strcmp(ifc->ifc_name, VMNET) == 0)
+		extra = VMNET_DEV_MASK;
+	else
+		extra = 0;
+
+	/* find any existing device, or allocate new unit number */
+	i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, extra);
+	if (i) {
+		dev = make_dev(&tap_cdevsw, unit2minor(unit | extra),
+		     UID_ROOT, GID_WHEEL, 0600, "%s%d", ifc->ifc_name, unit);
+		if (dev != NULL) {
+			dev_ref(dev);
+			dev->si_flags |= SI_CHEAPCLONE;
+		}
+	}
+
+	tapcreate(dev);
+	return (0);
+}
+
+/* vmnet devices are tap devices in disguise */
+static int
+vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+	return tap_clone_create(ifc, unit, params);
+}
+
+static void
+tap_destroy(struct tap_softc *tp)
+{
+	struct ifnet *ifp = tp->tap_ifp;
+	int s;
+
+	/* Unlocked read. */
+	KASSERT(!(tp->tap_flags & TAP_OPEN),
+		("%s flags is out of sync", ifp->if_xname));
+
+	knlist_destroy(&tp->tap_rsel.si_note);
+	destroy_dev(tp->tap_dev);
+	s = splimp();
+	ether_ifdetach(ifp);
+	if_free_type(ifp, IFT_ETHER);
+	splx(s);
+
+	mtx_destroy(&tp->tap_mtx);
+	free(tp, M_TAP);
+}
+
+static void
+tap_clone_destroy(struct ifnet *ifp)
+{
+	struct tap_softc *tp = ifp->if_softc;
+
+	mtx_lock(&tapmtx);
+	SLIST_REMOVE(&taphead, tp, tap_softc, tap_next);
+	mtx_unlock(&tapmtx);
+	tap_destroy(tp);
+}
+
+/* vmnet devices are tap devices in disguise */
+static void
+vmnet_clone_destroy(struct ifnet *ifp)
+{
+	tap_clone_destroy(ifp);
+}
+
 /*
  * tapmodevent
  *
  * module event handler
  */
 static int
-tapmodevent(mod, type, data)
-	module_t	 mod;
-	int		 type;
-	void		*data;
+tapmodevent(module_t mod, int type, void *data)
 {
 	static eventhandler_tag	 eh_tag = NULL;
 	struct tap_softc	*tp = NULL;
 	struct ifnet		*ifp = NULL;
-	int			 s;
 
 	switch (type) {
 	case MOD_LOAD:
@@ -166,6 +275,8 @@
 			mtx_destroy(&tapmtx);
 			return (ENOMEM);
 		}
+		if_clone_attach(&tap_cloner);
+		if_clone_attach(&vmnet_cloner);
 		return (0);
 
 	case MOD_UNLOAD:
@@ -187,6 +298,8 @@
 		mtx_unlock(&tapmtx);
 
 		EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
+		if_clone_detach(&tap_cloner);
+		if_clone_detach(&vmnet_cloner);
 
 		mtx_lock(&tapmtx);
 		while ((tp = SLIST_FIRST(&taphead)) != NULL) {
@@ -197,18 +310,7 @@
 
 			TAPDEBUG("detaching %s\n", ifp->if_xname);
 
-			/* Unlocked read. */
-			KASSERT(!(tp->tap_flags & TAP_OPEN), 
-				("%s flags is out of sync", ifp->if_xname));
-
-			destroy_dev(tp->tap_dev);
-			s = splimp();
-			ether_ifdetach(ifp);
-			if_free_type(ifp, IFT_ETHER);
-			splx(s);
-
-			mtx_destroy(&tp->tap_mtx);
-			free(tp, M_TAP);
+			tap_destroy(tp);
 			mtx_lock(&tapmtx);
 		}
 		mtx_unlock(&tapmtx);
@@ -232,45 +334,62 @@
  * We need to support two kind of devices - tap and vmnet
  */
 static void
-tapclone(arg, cred, name, namelen, dev)
-	void	*arg;
-	struct ucred *cred;
-	char	*name;
-	int	 namelen;
-	struct cdev **dev;
-{
-	u_int		extra;
-	int		i, unit;
-	char		*device_name = name;
+tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev)
+{
+	char		devname[SPECNAMELEN + 1];
+	int		i, unit, append_unit;
+	int		extra;
 
 	if (*dev != NULL)
 		return;
 
-	device_name = TAP;
+	if (!tapdclone ||
+	    (!tapuopen && priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0))
+		return;
+
+	unit = 0;
+	append_unit = 0;
 	extra = 0;
+
+	/* We're interested in only tap/vmnet devices. */
 	if (strcmp(name, TAP) == 0) {
 		unit = -1;
 	} else if (strcmp(name, VMNET) == 0) {
-		device_name = VMNET;
-		extra = VMNET_DEV_MASK;
 		unit = -1;
-	} else if (dev_stdclone(name, NULL, device_name, &unit) != 1) {
-		device_name = VMNET;
 		extra = VMNET_DEV_MASK;
-		if (dev_stdclone(name, NULL, device_name, &unit) != 1)
+	} else if (dev_stdclone(name, NULL, TAP, &unit) != 1) {
+		if (dev_stdclone(name, NULL, VMNET, &unit) != 1) {
 			return;
+		} else {
+			extra = VMNET_DEV_MASK;
+		}
 	}
 
+	if (unit == -1)
+		append_unit = 1;
+
 	/* find any existing device, or allocate new unit number */
 	i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra);
 	if (i) {
+		if (append_unit) {
+			/*
+			 * We were passed 'tun' or 'tap', with no unit specified
+			 * so we'll need to append it now.
+			 */
+			namelen = snprintf(devname, sizeof(devname), "%s%d", name,
+			    unit);
+			name = devname;
+		}
+
 		*dev = make_dev(&tap_cdevsw, unit2minor(unit | extra),
-		     UID_ROOT, GID_WHEEL, 0600, "%s%d", device_name, unit);
+		     UID_ROOT, GID_WHEEL, 0600, "%s", name);
 		if (*dev != NULL) {
 			dev_ref(*dev);
 			(*dev)->si_flags |= SI_CHEAPCLONE;
 		}
 	}
+
+	if_clone_create(name, namelen, NULL);
 } /* tapclone */
 
 
@@ -280,8 +399,7 @@
  * to create interface
  */
 static void
-tapcreate(dev)
-	struct cdev *dev;
+tapcreate(struct cdev *dev)
 {
 	struct ifnet		*ifp = NULL;
 	struct tap_softc	*tp = NULL;
@@ -342,6 +460,8 @@
 	tp->tap_flags |= TAP_INITED;
 	mtx_unlock(&tp->tap_mtx);
 
+	knlist_init(&tp->tap_rsel.si_note, NULL, NULL, NULL, NULL);
+
 	TAPDEBUG("interface %s is created. minor = %#x\n", 
 		ifp->if_xname, minor(dev));
 } /* tapcreate */
@@ -353,32 +473,22 @@
  * to open tunnel. must be superuser
  */
 static int
-tapopen(dev, flag, mode, td)
-	struct cdev *dev;
-	int		 flag;
-	int		 mode;
-	struct thread	*td;
+tapopen(struct cdev *dev, int flag, int mode, struct thread *td)
 {
 	struct tap_softc	*tp = NULL;
 	struct ifnet		*ifp = NULL;
-	int			 s;
+	int			 error, s;
 
-	if (tapuopen == 0 && suser(td) != 0)
-		return (EPERM);
+	if (tapuopen == 0) {
+		error = priv_check(td, PRIV_NET_TAP);
+		if (error)
+			return (error);
+	}
 
 	if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT)
 		return (ENXIO);
 
-	/*
-	 * XXXRW: Non-atomic test-and-set of si_drv1.  Currently protected
-	 * by Giant, but the race actually exists under memory pressure as
-	 * well even when running with Giant, as malloc() may sleep.
-	 */
 	tp = dev->si_drv1;
-	if (tp == NULL) {
-		tapcreate(dev);
-		tp = dev->si_drv1;
-	}
 
 	mtx_lock(&tp->tap_mtx);
 	if (tp->tap_flags & TAP_OPEN) {
@@ -386,7 +496,7 @@
 		return (EBUSY);
 	}
 
-	bcopy(IFP2ENADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr));
+	bcopy(IF_LLADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr));
 	tp->tap_pid = td->td_proc->p_pid;
 	tp->tap_flags |= TAP_OPEN;
 	ifp = tp->tap_ifp;
@@ -395,6 +505,8 @@
 	s = splimp();
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+	if (tapuponopen)
+		ifp->if_flags |= IFF_UP;
 	splx(s);
 
 	TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, minor(dev));
@@ -409,13 +521,9 @@
  * close the device - mark i/f down & delete routing info
  */
 static int
-tapclose(dev, foo, bar, td)
-	struct cdev *dev;
-	int		 foo;
-	int		 bar;
-	struct thread	*td;
+tapclose(struct cdev *dev, int foo, int bar, struct thread *td)
 {
-	struct ifaddr *ifa;
+	struct ifaddr		*ifa;
 	struct tap_softc	*tp = dev->si_drv1;
 	struct ifnet		*ifp = tp->tap_ifp;
 	int			s;
@@ -446,6 +554,7 @@
 
 	funsetown(&tp->tap_sigio);
 	selwakeuppri(&tp->tap_rsel, PZERO+1);
+	KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0);
 
 	mtx_lock(&tp->tap_mtx);
 	tp->tap_flags &= ~TAP_OPEN;
@@ -465,8 +574,7 @@
  * network interface initialization function
  */
 static void
-tapifinit(xtp)
-	void	*xtp;
+tapifinit(void *xtp)
 {
 	struct tap_softc	*tp = (struct tap_softc *)xtp;
 	struct ifnet		*ifp = tp->tap_ifp;
@@ -487,12 +595,9 @@
  * Process an ioctl request on network interface
  */
 static int
-tapifioctl(ifp, cmd, data)
-	struct ifnet	*ifp;
-	u_long		 cmd;
-	caddr_t		 data;
+tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
-	struct tap_softc	*tp = (struct tap_softc *)(ifp->if_softc);
+	struct tap_softc	*tp = ifp->if_softc;
 	struct ifstat		*ifs = NULL;
 	int			 s, dummy;
 
@@ -520,6 +625,7 @@
 			dummy = ether_ioctl(ifp, cmd, data);
 			splx(s);
 			return (dummy);
+			/* NOT REACHED */
 	}
 
 	return (0);
@@ -532,8 +638,7 @@
  * queue packets from higher level ready to put out
  */
 static void
-tapifstart(ifp)
-	struct ifnet	*ifp;
+tapifstart(struct ifnet *ifp)
 {
 	struct tap_softc	*tp = ifp->if_softc;
 	int			 s;
@@ -586,6 +691,7 @@
 			mtx_unlock(&tp->tap_mtx);
 
 		selwakeuppri(&tp->tap_rsel, PZERO+1);
+		KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0);
 		ifp->if_opackets ++; /* obytes are counted in ether_output */
 	}
 
@@ -600,18 +706,17 @@
  * the cdevsw interface is now pretty minimal
  */
 static int
-tapioctl(dev, cmd, data, flag, td)
-	struct cdev *dev;
-	u_long		 cmd;
-	caddr_t		 data;
-	int		 flag;
-	struct thread	*td;
+tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
 {
 	struct tap_softc	*tp = dev->si_drv1;
 	struct ifnet		*ifp = tp->tap_ifp;
 	struct tapinfo		*tapp = NULL;
 	int			 s;
 	int			 f;
+#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
+    defined(COMPAT_FREEBSD4)
+	int			 ival;
+#endif
 
 	switch (cmd) {
 		case TAPSIFINFO:
@@ -686,6 +791,13 @@
 			bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
 			break;
 
+#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
+    defined(COMPAT_FREEBSD4)
+		case _IO('V', 0):
+			ival = IOCPARM_IVAL(data);
+			data = (caddr_t)&ival;
+			/* FALLTHROUGH */
+#endif
 		case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
 			f = *(int *)data;
 			f &= 0x0fff;
@@ -724,10 +836,7 @@
  * least as much of a packet as can be read
  */
 static int
-tapread(dev, uio, flag)
-	struct cdev *dev;
-	struct uio	*uio;
-	int		 flag;
+tapread(struct cdev *dev, struct uio *uio, int flag)
 {
 	struct tap_softc	*tp = dev->si_drv1;
 	struct ifnet		*ifp = tp->tap_ifp;
@@ -798,15 +907,12 @@
  * the cdevsw write interface - an atomic write is a packet - or else!
  */
 static int
-tapwrite(dev, uio, flag)
-	struct cdev *dev;
-	struct uio	*uio;
-	int		 flag;
+tapwrite(struct cdev *dev, struct uio *uio, int flag)
 {
+	struct ether_header	*eh;
 	struct tap_softc	*tp = dev->si_drv1;
 	struct ifnet		*ifp = tp->tap_ifp;
 	struct mbuf		*m;
-	int			 error = 0;
 
 	TAPDEBUG("%s writting, minor = %#x\n", 
 		ifp->if_xname, minor(dev));
@@ -821,13 +927,31 @@
 		return (EIO);
 	}
 
-	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, ETHER_ALIGN)) == NULL) {
+	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, ETHER_ALIGN,
+	    M_PKTHDR)) == NULL) {
 		ifp->if_ierrors ++;
-		return (error);
+		return (ENOBUFS);
 	}
 
 	m->m_pkthdr.rcvif = ifp;
 
+	/*
+	 * Only pass a unicast frame to ether_input(), if it would actually
+	 * have been received by non-virtual hardware.
+	 */
+	if (m->m_len < sizeof(struct ether_header)) {
+		m_freem(m);
+		return (0);
+	}
+	eh = mtod(m, struct ether_header *);
+
+	if (eh && (ifp->if_flags & IFF_PROMISC) == 0 &&
+	    !ETHER_IS_MULTICAST(eh->ether_dhost) &&
+	    bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) {
+		m_freem(m);
+		return (0);
+	}
+
 	/* Pass packet up to parent. */
 	(*ifp->if_input)(ifp, m);
 	ifp->if_ipackets ++; /* ibytes are counted in parent */
@@ -844,10 +968,7 @@
  * anyway, it either accepts the packet or drops it
  */
 static int
-tappoll(dev, events, td)
-	struct cdev *dev;
-	int		 events;
-	struct thread	*td;
+tappoll(struct cdev *dev, int events, struct thread *td)
 {
 	struct tap_softc	*tp = dev->si_drv1;
 	struct ifnet		*ifp = tp->tap_ifp;
@@ -878,3 +999,104 @@
 	splx(s);
 	return (revents);
 } /* tappoll */
+
+
+/*
+ * tap_kqfilter
+ *
+ * support for kevent() system call
+ */
+static int
+tapkqfilter(struct cdev *dev, struct knote *kn)
+{
+    	int			 s;
+	struct tap_softc	*tp = dev->si_drv1;
+	struct ifnet		*ifp = tp->tap_ifp;
+
+	s = splimp();
+	switch (kn->kn_filter) {
+	case EVFILT_READ:
+		TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n",
+			ifp->if_xname, minor(dev));
+		kn->kn_fop = &tap_read_filterops;
+		break;
+
+	case EVFILT_WRITE:
+		TAPDEBUG("%s kqfilter: EVFILT_WRITE, minor = %#x\n",
+			ifp->if_xname, minor(dev));
+		kn->kn_fop = &tap_write_filterops;
+		break;
+
+	default:
+		TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n",
+			ifp->if_xname, minor(dev));
+		splx(s);
+		return (EINVAL);
+		/* NOT REACHED */
+	}
+	splx(s);
+
+	kn->kn_hook = (caddr_t) dev;
+	knlist_add(&tp->tap_rsel.si_note, kn, 0);
+
+	return (0);
+} /* tapkqfilter */
+
+
+/*
+ * tap_kqread
+ * 
+ * Return true if there is data in the interface queue
+ */
+static int
+tapkqread(struct knote *kn, long hint)
+{
+	int			 ret, s;
+	struct cdev		*dev = (struct cdev *)(kn->kn_hook);
+	struct tap_softc	*tp = dev->si_drv1;
+	struct ifnet		*ifp = tp->tap_ifp;
+
+	s = splimp();
+	if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
+		TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n",
+			ifp->if_xname, ifp->if_snd.ifq_len, minor(dev));
+		ret = 1;
+	} else {
+		TAPDEBUG("%s waiting for data, minor = %#x\n",
+			ifp->if_xname, minor(dev));
+		ret = 0;
+	}
+	splx(s);
+
+	return (ret);
+} /* tapkqread */
+
+
+/*
+ * tap_kqwrite
+ *
+ * Always can write. Return the MTU in kn->data
+ */
+static int
+tapkqwrite(struct knote *kn, long hint)
+{
+	int			 s;
+	struct tap_softc	*tp = ((struct cdev *) kn->kn_hook)->si_drv1;
+	struct ifnet		*ifp = tp->tap_ifp;
+
+	s = splimp();
+	kn->kn_data = ifp->if_mtu;
+	splx(s);
+
+	return (1);
+} /* tapkqwrite */
+
+
+static void
+tapkqdetach(struct knote *kn)
+{
+	struct tap_softc	*tp = ((struct cdev *) kn->kn_hook)->si_drv1;
+
+	knlist_remove(&tp->tap_rsel.si_note, kn, 0);
+} /* tapkqdetach */
+
Index: if_media.h
===================================================================
RCS file: /home/cvs/src/sys/net/if_media.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/net/if_media.h -L sys/net/if_media.h -u -r1.3 -r1.4
--- sys/net/if_media.h
+++ sys/net/if_media.h
@@ -1,5 +1,5 @@
 /*	$NetBSD: if_media.h,v 1.3 1997/03/26 01:19:27 thorpej Exp $	*/
-/* $FreeBSD: /repoman/r/ncvs/src/sys/net/if_media.h,v 1.30.2.1 2006/03/17 20:17:43 glebius Exp $ */
+/* $FreeBSD: src/sys/net/if_media.h,v 1.40 2007/09/18 20:30:40 sam Exp $ */
 
 /*-
  * Copyright (c) 1997
@@ -143,6 +143,8 @@
 #define	IFM_HPNA_1	17		/* HomePNA 1.0 (1Mb/s) */
 #define	IFM_10G_LR	18		/* 10GBase-LR 1310nm Single-mode */
 #define	IFM_10G_SR	19		/* 10GBase-SR 850nm Multi-mode */
+#define	IFM_10G_CX4	20		/* 10GBase CX4 copper */
+#define IFM_2500_SX	21		/* 2500BaseSX - multi-mode fiber */
 
 /* note 31 is the max! */
 
@@ -197,6 +199,11 @@
 #define	IFM_IEEE80211_OFDM72	18	/* OFDM 72Mbps */
 #define	IFM_IEEE80211_DS354k	19	/* Direct Sequence 354Kbps */
 #define	IFM_IEEE80211_DS512k	20	/* Direct Sequence 512Kbps */
+#define	IFM_IEEE80211_OFDM3	21	/* OFDM 3Mbps */
+#define	IFM_IEEE80211_OFDM4	22	/* OFDM 4.5Mbps */
+#define	IFM_IEEE80211_OFDM27	23	/* OFDM 27Mbps */
+/* NB: not enough bits to express MCS fully */
+#define	IFM_IEEE80211_MCS	24	/* HT MCS rate */
 
 #define	IFM_IEEE80211_ADHOC	0x00000100	/* Operate in Adhoc mode */
 #define	IFM_IEEE80211_HOSTAP	0x00000200	/* Operate in Host AP mode */
@@ -210,6 +217,8 @@
 #define	IFM_IEEE80211_11B	0x00020000	/* Direct Sequence mode */
 #define	IFM_IEEE80211_11G	0x00030000	/* 2Ghz, CCK mode */
 #define	IFM_IEEE80211_FH	0x00040000	/* 2Ghz, GFSK mode */
+#define	IFM_IEEE80211_11NA	0x00050000	/* 5Ghz, HT mode */
+#define	IFM_IEEE80211_11NG	0x00060000	/* 2Ghz, HT mode */
 
 /*
  * ATM
@@ -270,12 +279,12 @@
 #define	IFM_ACTIVE	0x00000002	/* Interface attached to working net */
 
 /* Mask of "status valid" bits, for ifconfig(8). */
-#define IFM_STATUS_VALID        IFM_AVALID
- 	 
+#define	IFM_STATUS_VALID	IFM_AVALID
+
 /* List of "status valid" bits, for ifconfig(8). */
-#define IFM_STATUS_VALID_LIST {          \
-     IFM_AVALID,                         \
-     0                                   \
+#define IFM_STATUS_VALID_LIST {						\
+        IFM_AVALID,							\
+        0								\
 }
 
 /*
@@ -339,6 +348,8 @@
 	{ IFM_HPNA_1,	"homePNA" },					\
 	{ IFM_10G_LR,	"10Gbase-LR" },					\
 	{ IFM_10G_SR,	"10Gbase-SR" },					\
+	{ IFM_10G_CX4,	"10Gbase-CX4" },				\
+	{ IFM_2500_SX,	"2500BaseSX" },					\
 	{ 0, NULL },							\
 }
 
@@ -360,6 +371,7 @@
 	{ IFM_1000_CX,	"1000CX" },					\
 	{ IFM_1000_T,	"1000TX" },					\
 	{ IFM_1000_T,	"1000T" },					\
+	{ IFM_2500_SX,	"2500SX" },					\
 	{ 0, NULL },							\
 }
 
@@ -435,6 +447,9 @@
 	{ IFM_IEEE80211_OFDM72, "OFDM/72Mbps" },			\
 	{ IFM_IEEE80211_DS354k, "DS/354Kbps" },				\
 	{ IFM_IEEE80211_DS512k, "DS/512Kbps" },				\
+	{ IFM_IEEE80211_OFDM3, "OFDM/3Mbps" },				\
+	{ IFM_IEEE80211_OFDM4, "OFDM/4.5Mbps" },			\
+	{ IFM_IEEE80211_OFDM27, "OFDM/27Mbps" },			\
 	{ 0, NULL },							\
 }
 
@@ -470,6 +485,9 @@
 	{ IFM_IEEE80211_DS354k, "DirectSequence/354Kbps" },		\
 	{ IFM_IEEE80211_DS512k, "DS512K" },				\
 	{ IFM_IEEE80211_DS512k, "DirectSequence/512Kbps" },		\
+	{ IFM_IEEE80211_OFDM3, "OFDM3" },				\
+	{ IFM_IEEE80211_OFDM4, "OFDM4.5" },				\
+	{ IFM_IEEE80211_OFDM27, "OFDM27" },				\
 	{ 0, NULL },							\
 }
 
@@ -489,6 +507,8 @@
 	{ IFM_IEEE80211_11B, "11b" },					\
 	{ IFM_IEEE80211_11G, "11g" },					\
 	{ IFM_IEEE80211_FH, "fh" },					\
+	{ IFM_IEEE80211_11NA, "11na" },					\
+	{ IFM_IEEE80211_11NG, "11ng" },					\
 	{ 0, NULL },							\
 }
 
@@ -580,6 +600,9 @@
 	{ IFM_ETHER | IFM_1000_T,	IF_Mbps(1000) },		\
 	{ IFM_ETHER | IFM_HPNA_1,	IF_Mbps(1) },			\
 	{ IFM_ETHER | IFM_10G_LR,	IF_Gbps(10ULL) },		\
+	{ IFM_ETHER | IFM_10G_SR,	IF_Gbps(10ULL) },		\
+	{ IFM_ETHER | IFM_10G_CX4,	IF_Gbps(10ULL) },		\
+	{ IFM_ETHER | IFM_2500_SX,	IF_Mbps(2500ULL) },		\
 									\
 	{ IFM_TOKEN | IFM_TOK_STP4,	IF_Mbps(4) },			\
 	{ IFM_TOKEN | IFM_TOK_STP16,	IF_Mbps(16) },			\
@@ -614,30 +637,29 @@
  * Status descriptions for the various media types.
  */
 struct ifmedia_status_description {
-	int        ifms_type;
-	int        ifms_valid;
-	int        ifms_bit;
+	int	   ifms_type;
+	int	   ifms_valid;
+	int	   ifms_bit;
 	const char *ifms_string[2];
 };
- 	 
-#define IFM_STATUS_DESC(ifms, bit)                                      \
-     (ifms)->ifms_string[((ifms)->ifms_bit & (bit)) ? 1 : 0]
- 	 
-#define IFM_STATUS_DESCRIPTIONS {                                       \
-     { IFM_ETHER,            IFM_AVALID,     IFM_ACTIVE,             \
-        { "no carrier", "active" } },                               \
-     { IFM_FDDI,             IFM_AVALID,     IFM_ACTIVE,             \
-        { "no ring", "inserted" } },                                \
-     { IFM_TOKEN,            IFM_AVALID,     IFM_ACTIVE,             \
-        { "no ring", "inserted" } },                                \
-     { IFM_IEEE80211,        IFM_AVALID,     IFM_ACTIVE,             \
-        { "no network", "active" } },                               \
-     { IFM_ATM,              IFM_AVALID,     IFM_ACTIVE,             \
-        { "no network", "active" } },                               \
-     { IFM_CARP,             IFM_AVALID,     IFM_ACTIVE,             \
-        { "backup", "master" } },                                   \
-     { 0,                    0,              0,                      \
-        { NULL, NULL } }                                            \
-}
 
+#define IFM_STATUS_DESC(ifms, bit)					\
+	(ifms)->ifms_string[((ifms)->ifms_bit & (bit)) ? 1 : 0]
+
+#define IFM_STATUS_DESCRIPTIONS {					\
+	{ IFM_ETHER,		IFM_AVALID,	IFM_ACTIVE,		\
+	    { "no carrier", "active" } },				\
+	{ IFM_FDDI,		IFM_AVALID,	IFM_ACTIVE,		\
+	    { "no ring", "inserted" } },				\
+	{ IFM_TOKEN,		IFM_AVALID,	IFM_ACTIVE,		\
+	    { "no ring", "inserted" } },				\
+	{ IFM_IEEE80211,	IFM_AVALID,	IFM_ACTIVE,		\
+	    { "no network", "active" } },				\
+	{ IFM_ATM,		IFM_AVALID,	IFM_ACTIVE,		\
+	    { "no network", "active" } },				\
+	{ IFM_CARP,		IFM_AVALID,	IFM_ACTIVE,		\
+	    { "backup", "master" } },					\
+	{ 0,			0,		0,			\
+	    { NULL, NULL } }						\
+}
 #endif	/* _NET_IF_MEDIA_H_ */
Index: if_ppp.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_ppp.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/net/if_ppp.c -L sys/net/if_ppp.c -u -r1.2 -r1.3
--- sys/net/if_ppp.c
+++ sys/net/if_ppp.c
@@ -71,11 +71,12 @@
  * Paul Mackerras (paulus at cs.anu.edu.au).
  */
 
-/* $FreeBSD: src/sys/net/if_ppp.c,v 1.105.2.2 2005/08/25 05:01:20 rwatson Exp $ */
+/* $FreeBSD: src/sys/net/if_ppp.c,v 1.121 2007/07/02 15:44:30 rwatson Exp $ */
 /* from if_sl.c,v 1.11 84/10/04 12:54:47 rick Exp */
 /* from NetBSD: if_ppp.c,v 1.15.2.2 1994/07/28 05:17:58 cgd Exp */
 
 #include "opt_inet.h"
+#include "opt_inet6.h"
 #include "opt_ipx.h"
 #include "opt_mac.h"
 #include "opt_ppp.h"
@@ -87,8 +88,8 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
-#include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/filio.h>
@@ -104,14 +105,14 @@
 #include <net/netisr.h>
 #include <net/bpf.h>
 
-#if INET
+#ifdef INET
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #endif
 
-#if IPX
+#ifdef IPX
 #include <netipx/ipx.h>
 #include <netipx/ipx_if.h>
 #endif
@@ -123,6 +124,8 @@
 #include <net/if_ppp.h>
 #include <net/if_pppvar.h>
 
+#include <security/mac/mac_framework.h>
+
 /* minimise diffs */
 #ifndef splsoftnet
 #define splsoftnet	splnet
@@ -157,7 +160,7 @@
 static void	ppp_ccp_closed(struct ppp_softc *);
 static void	ppp_inproc(struct ppp_softc *, struct mbuf *);
 static void	pppdumpm(struct mbuf *m0);
-static int	ppp_clone_create(struct if_clone *, int);
+static int	ppp_clone_create(struct if_clone *, int, caddr_t);
 static void	ppp_clone_destroy(struct ifnet *);
 
 IFC_SIMPLE_DECLARE(ppp, 0);
@@ -193,10 +196,10 @@
 extern struct compressor ppp_deflate, ppp_deflate_draft;
 
 static struct compressor *ppp_compressors[8] = {
-#if DO_BSD_COMPRESS && defined(PPP_BSDCOMP)
+#if defined(PPP_BSDCOMP)
     &ppp_bsd_compress,
 #endif
-#if DO_DEFLATE && defined(PPP_DEFLATE)
+#if defined(PPP_DEFLATE)
     &ppp_deflate,
     &ppp_deflate_draft,
 #endif
@@ -205,7 +208,7 @@
 #endif /* PPP_COMPRESS */
 
 static int
-ppp_clone_create(struct if_clone *ifc, int unit)
+ppp_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct ifnet		*ifp;
 	struct ppp_softc	*sc;
@@ -217,6 +220,7 @@
 		return (ENOSPC);
 	}
 
+	callout_init(&sc->sc_timo_ch, 0);
 	ifp->if_softc = sc;
 	if_initname(ifp, ifc->ifc_name, unit);
 	ifp->if_mtu = PPP_MTU;
@@ -242,11 +246,15 @@
 }
 
 static void
-ppp_destroy(struct ppp_softc *sc)
+ppp_clone_destroy(struct ifnet *ifp)
 {
-	struct ifnet *ifp;
+	struct ppp_softc *sc;
+
+	sc = ifp->if_softc;
+	PPP_LIST_LOCK();
+	LIST_REMOVE(sc, sc_list);
+	PPP_LIST_UNLOCK();
 
-	ifp = PPP2IFP(sc);
 	bpfdetach(ifp);
 	if_detach(ifp);
 	if_free(ifp);
@@ -256,22 +264,9 @@
 	free(sc, M_PPP);
 }
 
-static void
-ppp_clone_destroy(struct ifnet *ifp)
-{
-	struct ppp_softc *sc;
-
-	sc = ifp->if_softc;
-	PPP_LIST_LOCK();
-	LIST_REMOVE(sc, sc_list);
-	PPP_LIST_UNLOCK();
-	ppp_destroy(sc);
-}
-
 static int
 ppp_modevent(module_t mod, int type, void *data) 
 {
-	struct ppp_softc *sc;
 
 	switch (type) { 
 	case MOD_LOAD: 
@@ -293,14 +288,6 @@
 		netisr_unregister(NETISR_PPP);
 
 		if_clone_detach(&ppp_cloner);
-
-		PPP_LIST_LOCK();
-		while ((sc = LIST_FIRST(&ppp_softc_list)) != NULL) {
-			LIST_REMOVE(sc, sc_list);
-			PPP_LIST_UNLOCK();
-			ppp_destroy(sc);
-			PPP_LIST_LOCK();
-		}
 		PPP_LIST_LOCK_DESTROY();
 		break; 
 	default:
@@ -345,7 +332,7 @@
     /* Try to clone an interface if we don't have a free one */
     if (sc == NULL) {
 	strcpy(tmpname, PPPNAME);
-	if (if_clone_create(tmpname, sizeof(tmpname)) != 0)
+	if (if_clone_create(tmpname, sizeof(tmpname), (caddr_t) 0) != 0)
 	    return NULL;
 	ifp = ifunit(tmpname);
 	if (ifp == NULL)
@@ -373,7 +360,7 @@
 	sc->sc_npmode[i] = NPMODE_ERROR;
     sc->sc_npqueue = NULL;
     sc->sc_npqtail = &sc->sc_npqueue;
-    sc->sc_last_sent = sc->sc_last_recv = time_second;
+    sc->sc_last_sent = sc->sc_last_recv = time_uptime;
 
     return sc;
 }
@@ -467,7 +454,8 @@
 	break;
 
     case PPPIOCSFLAGS:
-	if ((error = suser(td)) != 0)
+	error = priv_check(td, PRIV_NET_PPP);
+	if (error)
 	    break;
 	flags = *(int *)data & SC_MASK;
 	s = splsoftnet();
@@ -481,8 +469,9 @@
 	break;
 
     case PPPIOCSMRU:
-	if ((error = suser(td)) != 0)
-	    return (error);
+	error = priv_check(td, PRIV_NET_PPP);
+	if (error)
+		return (error);
 	mru = *(int *)data;
 	if (mru >= PPP_MRU && mru <= PPP_MAXMRU)
 	    sc->sc_mru = mru;
@@ -494,7 +483,8 @@
 
 #ifdef VJC
     case PPPIOCSMAXCID:
-	if ((error = suser(td)) != 0)
+	error = priv_check(td, PRIV_NET_PPP);
+	if (error)
 	    break;
 	if (sc->sc_comp) {
 	    s = splsoftnet();
@@ -505,14 +495,16 @@
 #endif
 
     case PPPIOCXFERUNIT:
-	if ((error = suser(td)) != 0)
+	error = priv_check(td, PRIV_NET_PPP);
+	if (error)
 	    break;
 	sc->sc_xfer = p->p_pid;
 	break;
 
 #ifdef PPP_COMPRESS
     case PPPIOCSCOMPRESS:
-	if ((error = suser(td)) != 0)
+	error = priv_check(td, PRIV_NET_PPP);
+	if (error)
 	    break;
 	odp = (struct ppp_option_data *) data;
 	nb = odp->length;
@@ -560,7 +552,7 @@
 		    sc->sc_flags &= ~SC_DECOMP_RUN;
 		    splx(s);
 		}
-		break;
+		return (error);
 	    }
 	if (sc->sc_flags & SC_DEBUG)
 	    if_printf(PPP2IFP(sc), "no compressor for [%x %x %x], %x\n",
@@ -577,6 +569,9 @@
 	case PPP_IP:
 	    npx = NP_IP;
 	    break;
+	case PPP_IPV6:
+	    npx = NP_IPV6;
+	    break;
 	default:
 	    error = EINVAL;
 	}
@@ -585,7 +580,8 @@
 	if (cmd == PPPIOCGNPMODE) {
 	    npi->mode = sc->sc_npmode[npx];
 	} else {
-	    if ((error = suser(td)) != 0)
+	    error = priv_check(td, PRIV_NET_PPP);
+	    if (error)
 		break;
 	    if (npi->mode != sc->sc_npmode[npx]) {
 		s = splsoftnet();
@@ -601,7 +597,7 @@
 
     case PPPIOCGIDLE:
 	s = splsoftnet();
-	t = time_second;
+	t = time_uptime;
 	((struct ppp_idle *)data)->xmit_idle = t - sc->sc_last_sent;
 	((struct ppp_idle *)data)->recv_idle = t - sc->sc_last_recv;
 	splx(s);
@@ -684,6 +680,10 @@
 	case AF_INET:
 	    break;
 #endif
+#ifdef INET6
+	case AF_INET6:
+	    break;
+#endif
 #ifdef IPX
 	case AF_IPX:
 	    break;
@@ -700,6 +700,10 @@
 	case AF_INET:
 	    break;
 #endif
+#ifdef INET6
+	case AF_INET6:
+	    break;
+#endif
 #ifdef IPX
 	case AF_IPX:
 	    break;
@@ -711,7 +715,12 @@
 	break;
 
     case SIOCSIFMTU:
-	if ((error = suser(td)) != 0)
+	/*
+	 * XXXRW: Isn't this priv_check() check redundant to the one at the
+	 * ifnet layer?
+	 */
+	error = priv_check(td, PRIV_NET_SETIFMTU);
+	if (error)
 	    break;
 	if (ifr->ifr_mtu > PPP_MAXMTU)
 	    error = EINVAL;
@@ -737,6 +746,10 @@
 	case AF_INET:
 	    break;
 #endif
+#ifdef INET6
+	case AF_INET6:
+	    break;
+#endif
 	default:
 	    error = EAFNOSUPPORT;
 	    break;
@@ -834,6 +847,24 @@
 	    m0->m_flags |= M_HIGHPRI;
 	break;
 #endif
+#ifdef INET6
+    case AF_INET6:
+	address = PPP_ALLSTATIONS;	/*XXX*/
+	control = PPP_UI;		/*XXX*/
+	protocol = PPP_IPV6;
+	mode = sc->sc_npmode[NP_IPV6];
+
+#if 0	/* XXX flowinfo/traffic class, maybe? */
+	/*
+	 * If this packet has the "low delay" bit set in the IP header,
+	 * put it on the fastq instead.
+	 */
+	ip = mtod(m0, struct ip *);
+	if (ip->ip_tos & IPTOS_LOWDELAY)
+	    m0->m_flags |= M_HIGHPRI;
+#endif
+	break;
+#endif
 #ifdef IPX
     case AF_IPX:
 	/*
@@ -918,14 +949,14 @@
 	 */
 	if (sc->sc_active_filt.bf_insns == 0
 	    || bpf_filter(sc->sc_active_filt.bf_insns, (u_char *) m0, len, 0))
-	    sc->sc_last_sent = time_second;
+	    sc->sc_last_sent = time_uptime;
 
 	*mtod(m0, u_char *) = address;
 #else
 	/*
 	 * Update the time we sent the most recent data packet.
 	 */
-	sc->sc_last_sent = time_second;
+	sc->sc_last_sent = time_uptime;
 #endif /* PPP_FILTER */
     }
 
@@ -990,6 +1021,9 @@
 	case PPP_IP:
 	    mode = sc->sc_npmode[NP_IP];
 	    break;
+	case PPP_IPV6:
+	    mode = sc->sc_npmode[NP_IPV6];
+	    break;
 	default:
 	    mode = NPMODE_PASS;
 	}
@@ -1557,14 +1591,14 @@
 	}
 	if (sc->sc_active_filt.bf_insns == 0
 	    || bpf_filter(sc->sc_active_filt.bf_insns, (u_char *) m, ilen, 0))
-	    sc->sc_last_recv = time_second;
+	    sc->sc_last_recv = time_uptime;
 
 	*mtod(m, u_char *) = adrs;
 #else
 	/*
 	 * Record the time that we received this packet.
 	 */
-	sc->sc_last_recv = time_second;
+	sc->sc_last_recv = time_uptime;
 #endif /* PPP_FILTER */
     }
 
@@ -1587,11 +1621,28 @@
 	m->m_pkthdr.len -= PPP_HDRLEN;
 	m->m_data += PPP_HDRLEN;
 	m->m_len -= PPP_HDRLEN;
-	if (ip_fastforward(m))
+	if ((m = ip_fastforward(m)) == NULL)
 	    return;
 	isr = NETISR_IP;
 	break;
 #endif
+#ifdef INET6
+    case PPP_IPV6:
+	/*
+	 * IPv6 packet - take off the ppp header and pass it up to IPv6.
+	 */
+	if ((ifp->if_flags & IFF_UP) == 0
+	    || sc->sc_npmode[NP_IPV6] != NPMODE_PASS) {
+	    /* interface is down - drop the packet. */
+	    m_freem(m);
+	    return;
+	}
+	m->m_pkthdr.len -= PPP_HDRLEN;
+	m->m_data += PPP_HDRLEN;
+	m->m_len -= PPP_HDRLEN;
+	isr = NETISR_IPV6;
+	break;
+#endif
 #ifdef IPX
     case PPP_IPX:
 	/*
@@ -1607,7 +1658,7 @@
 	m->m_data += PPP_HDRLEN;
 	m->m_len -= PPP_HDRLEN;
 	isr = NETISR_IPX;
-	sc->sc_last_recv = time_second;	/* update time of last pkt rcvd */
+	sc->sc_last_recv = time_uptime;	/* update time of last pkt rcvd */
 	break;
 #endif
 
Index: raw_usrreq.c
===================================================================
RCS file: /home/cvs/src/sys/net/raw_usrreq.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/net/raw_usrreq.c -L sys/net/raw_usrreq.c -u -r1.1.1.1 -r1.2
--- sys/net/raw_usrreq.c
+++ sys/net/raw_usrreq.c
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)raw_usrreq.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/net/raw_usrreq.c,v 1.37 2005/01/07 01:45:35 imp Exp $
+ * $FreeBSD: src/sys/net/raw_usrreq.c,v 1.44 2006/11/06 13:42:02 rwatson Exp $
  */
 
 #include <sys/param.h>
@@ -36,6 +36,7 @@
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
+#include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
@@ -138,19 +139,24 @@
 	/* INCOMPLETE */
 }
 
-static int
+static void
 raw_uabort(struct socket *so)
 {
 	struct rawcb *rp = sotorawcb(so);
 
-	if (rp == 0)
-		return EINVAL;
+	KASSERT(rp != NULL, ("raw_uabort: rp == NULL"));
+	raw_disconnect(rp);
+	soisdisconnected(so);
+}
+
+static void
+raw_uclose(struct socket *so)
+{
+	struct rawcb *rp = sotorawcb(so);
+
+	KASSERT(rp != NULL, ("raw_uabort: rp == NULL"));
 	raw_disconnect(rp);
 	soisdisconnected(so);
-	ACCEPT_LOCK();
-	SOCK_LOCK(so);
-	sotryfree(so);
-	return 0;
 }
 
 /* pru_accept is EOPNOTSUPP */
@@ -158,13 +164,19 @@
 static int
 raw_uattach(struct socket *so, int proto, struct thread *td)
 {
-	struct rawcb *rp = sotorawcb(so);
 	int error;
 
-	if (rp == 0)
-		return EINVAL;
-	if (td && (error = suser(td)) != 0)
-		return error;
+	/*
+	 * Implementors of raw sockets will already have allocated the PCB,
+	 * so it must be non-NULL here.
+	 */
+	KASSERT(sotorawcb(so) != NULL, ("raw_uattach: so_pcb == NULL"));
+
+	if (td != NULL) {
+		error = priv_check(td, PRIV_NET_RAW);
+		if (error)
+			return error;
+	}
 	return raw_attach(so, proto);
 }
 
@@ -183,16 +195,13 @@
 /* pru_connect2 is EOPNOTSUPP */
 /* pru_control is EOPNOTSUPP */
 
-static int
+static void
 raw_udetach(struct socket *so)
 {
 	struct rawcb *rp = sotorawcb(so);
 
-	if (rp == 0)
-		return EINVAL;
-
+	KASSERT(rp != NULL, ("raw_udetach: rp == NULL"));
 	raw_detach(rp);
-	return 0;
 }
 
 static int
@@ -200,8 +209,7 @@
 {
 	struct rawcb *rp = sotorawcb(so);
 
-	if (rp == 0)
-		return EINVAL;
+	KASSERT(rp != NULL, ("raw_udisconnect: rp == NULL"));
 	if (rp->rcb_faddr == 0) {
 		return ENOTCONN;
 	}
@@ -217,8 +225,7 @@
 {
 	struct rawcb *rp = sotorawcb(so);
 
-	if (rp == 0)
-		return EINVAL;
+	KASSERT(rp != NULL, ("raw_upeeraddr: rp == NULL"));
 	if (rp->rcb_faddr == 0) {
 		return ENOTCONN;
 	}
@@ -236,10 +243,7 @@
 	int error;
 	struct rawcb *rp = sotorawcb(so);
 
-	if (rp == 0) {
-		error = EINVAL;
-		goto release;
-	}
+	KASSERT(rp != NULL, ("raw_usend: rp == NULL"));
 
 	if (flags & PRUS_OOB) {
 		error = EOPNOTSUPP;
@@ -275,10 +279,8 @@
 static int
 raw_ushutdown(struct socket *so)
 {
-	struct rawcb *rp = sotorawcb(so);
 
-	if (rp == 0)
-		return EINVAL;
+	KASSERT(sotorawcb(so) != NULL, ("raw_ushutdown: rp == NULL"));
 	socantsendmore(so);
 	return 0;
 }
@@ -288,8 +290,7 @@
 {
 	struct rawcb *rp = sotorawcb(so);
 
-	if (rp == 0)
-		return EINVAL;
+	KASSERT(rp != NULL, ("raw_usockaddr: rp == NULL"));
 	if (rp->rcb_laddr == 0)
 		return EINVAL;
 	*nam = sodupsockaddr(rp->rcb_laddr, M_WAITOK);
@@ -307,4 +308,5 @@
 	.pru_send =		raw_usend,
 	.pru_shutdown =		raw_ushutdown,
 	.pru_sockaddr =		raw_usockaddr,
+	.pru_close =		raw_uclose,
 };


More information about the Midnightbsd-cvs mailing list