[Midnightbsd-cvs] src [9938] trunk/sys/net: sync with freebsd
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Fri May 25 16:05:59 EDT 2018
Revision: 9938
http://svnweb.midnightbsd.org/src/?rev=9938
Author: laffer1
Date: 2018-05-25 16:05:59 -0400 (Fri, 25 May 2018)
Log Message:
-----------
sync with freebsd
Modified Paths:
--------------
trunk/sys/net/bpf.c
trunk/sys/net/bpf.h
trunk/sys/net/bpf_buffer.c
trunk/sys/net/bpf_buffer.h
trunk/sys/net/bpf_filter.c
trunk/sys/net/bpf_jitter.c
trunk/sys/net/bpf_jitter.h
trunk/sys/net/bpf_zerocopy.c
trunk/sys/net/bpf_zerocopy.h
trunk/sys/net/bpfdesc.h
trunk/sys/net/bridgestp.c
trunk/sys/net/bridgestp.h
trunk/sys/net/ethernet.h
trunk/sys/net/fddi.h
trunk/sys/net/firewire.h
trunk/sys/net/flowtable.c
trunk/sys/net/flowtable.h
trunk/sys/net/ieee8023ad_lacp.c
trunk/sys/net/ieee8023ad_lacp.h
trunk/sys/net/if.c
trunk/sys/net/if.h
trunk/sys/net/if_arc.h
trunk/sys/net/if_arcsubr.c
trunk/sys/net/if_arp.h
trunk/sys/net/if_atm.h
trunk/sys/net/if_atmsubr.c
trunk/sys/net/if_bridge.c
trunk/sys/net/if_bridgevar.h
trunk/sys/net/if_clone.c
trunk/sys/net/if_clone.h
trunk/sys/net/if_dead.c
trunk/sys/net/if_debug.c
trunk/sys/net/if_disc.c
trunk/sys/net/if_dl.h
trunk/sys/net/if_edsc.c
trunk/sys/net/if_ef.c
trunk/sys/net/if_enc.c
trunk/sys/net/if_enc.h
trunk/sys/net/if_epair.c
trunk/sys/net/if_ethersubr.c
trunk/sys/net/if_faith.c
trunk/sys/net/if_fddisubr.c
trunk/sys/net/if_fwsubr.c
trunk/sys/net/if_gif.c
trunk/sys/net/if_gif.h
trunk/sys/net/if_gre.c
trunk/sys/net/if_gre.h
trunk/sys/net/if_iso88025subr.c
trunk/sys/net/if_lagg.c
trunk/sys/net/if_lagg.h
trunk/sys/net/if_llatbl.c
trunk/sys/net/if_llatbl.h
trunk/sys/net/if_llc.h
trunk/sys/net/if_loop.c
trunk/sys/net/if_media.c
trunk/sys/net/if_media.h
trunk/sys/net/if_mib.c
trunk/sys/net/if_mib.h
trunk/sys/net/if_sppp.h
trunk/sys/net/if_spppfr.c
trunk/sys/net/if_spppsubr.c
trunk/sys/net/if_stf.c
trunk/sys/net/if_stf.h
trunk/sys/net/if_tap.c
trunk/sys/net/if_tap.h
trunk/sys/net/if_tapvar.h
trunk/sys/net/if_tun.c
trunk/sys/net/if_tun.h
trunk/sys/net/if_types.h
trunk/sys/net/if_var.h
trunk/sys/net/if_vlan.c
trunk/sys/net/if_vlan_var.h
trunk/sys/net/iso88025.h
trunk/sys/net/netisr.c
trunk/sys/net/netisr.h
trunk/sys/net/netisr_internal.h
trunk/sys/net/netmap.h
trunk/sys/net/netmap_user.h
trunk/sys/net/pfil.c
trunk/sys/net/pfil.h
trunk/sys/net/pfkeyv2.h
trunk/sys/net/ppp_defs.h
trunk/sys/net/radix.c
trunk/sys/net/radix.h
trunk/sys/net/radix_mpath.c
trunk/sys/net/radix_mpath.h
trunk/sys/net/raw_cb.c
trunk/sys/net/raw_cb.h
trunk/sys/net/raw_usrreq.c
trunk/sys/net/route.c
trunk/sys/net/route.h
trunk/sys/net/rtsock.c
trunk/sys/net/slcompress.c
trunk/sys/net/slcompress.h
trunk/sys/net/vnet.c
trunk/sys/net/vnet.h
trunk/sys/net/zlib.c
trunk/sys/net/zlib.h
trunk/sys/net/zutil.h
Added Paths:
-----------
trunk/sys/net/if_me.c
trunk/sys/net/if_pflog.h
trunk/sys/net/if_pfsync.h
trunk/sys/net/if_vxlan.c
trunk/sys/net/if_vxlan.h
trunk/sys/net/mppc.h
trunk/sys/net/mppcc.c
trunk/sys/net/mppcd.c
trunk/sys/net/paravirt.h
trunk/sys/net/pfvar.h
trunk/sys/net/rndis.h
trunk/sys/net/sff8436.h
trunk/sys/net/sff8472.h
Modified: trunk/sys/net/bpf.c
===================================================================
--- trunk/sys/net/bpf.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/bpf.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/bpf.c 315625 2017-03-20 08:16:05Z ae $");
#include "opt_bpf.h"
#include "opt_compat.h"
@@ -142,7 +142,7 @@
* structures registered by different layers in the stack (i.e., 802.11
* frames, ethernet frames, etc).
*/
-static LIST_HEAD(, bpf_if) bpf_iflist;
+static LIST_HEAD(, bpf_if) bpf_iflist, bpf_freelist;
static struct mtx bpf_mtx; /* bpf global lock */
static int bpf_bpfd_cnt;
@@ -523,32 +523,15 @@
}
len = uio->uio_resid;
-
- if (len - hlen > ifp->if_mtu)
+ if (len < hlen || len - hlen > ifp->if_mtu)
return (EMSGSIZE);
- if ((unsigned)len > MJUM16BYTES)
+ m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR);
+ if (m == NULL)
return (EIO);
-
- if (len <= MHLEN)
- MGETHDR(m, M_WAIT, MT_DATA);
- else if (len <= MCLBYTES)
- m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
- else
- m = m_getjcl(M_WAIT, MT_DATA, M_PKTHDR,
-#if (MJUMPAGESIZE > MCLBYTES)
- len <= MJUMPAGESIZE ? MJUMPAGESIZE :
-#endif
- (len <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES));
m->m_pkthdr.len = m->m_len = len;
- m->m_pkthdr.rcvif = NULL;
*mp = m;
- if (m->m_len < hlen) {
- error = EPERM;
- goto bad;
- }
-
error = uiomove(mtod(m, u_char *), len, uio);
if (error)
goto bad;
@@ -618,13 +601,13 @@
* Save sysctl value to protect from sysctl change
* between reads
*/
- op_w = V_bpf_optimize_writers;
+ op_w = V_bpf_optimize_writers || d->bd_writer;
if (d->bd_bif != NULL)
bpf_detachd_locked(d);
/*
* Point d at bp, and add d to the interface's list.
- * Since there are many applicaiotns using BPF for
+ * Since there are many applications using BPF for
* sending raw packets only (dhcpd, cdpd are good examples)
* we can delay adding d to the list of active listeners until
* some filter is configured.
@@ -661,7 +644,7 @@
/*
* Add d to the list of active bp filters.
- * Reuqires bpf_attachd() to be called before
+ * Requires bpf_attachd() to be called before.
*/
static void
bpf_upgraded(struct bpf_d *d)
@@ -805,7 +788,7 @@
bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
{
struct bpf_d *d;
- int error, size;
+ int error;
d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
error = devfs_set_cdevpriv(d, bpf_dtor);
@@ -820,6 +803,9 @@
* particular buffer method.
*/
bpf_buffer_init(d);
+ if ((flags & FREAD) == 0)
+ d->bd_writer = 2;
+ d->bd_hbuf_in_use = 0;
d->bd_bufmode = BPF_BUFMODE_BUFFER;
d->bd_sig = SIGIO;
d->bd_direction = BPF_D_INOUT;
@@ -832,10 +818,6 @@
callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
- /* Allocate default buffers */
- size = d->bd_bufsize;
- bpf_buffer_ioctl_sblen(d, &size);
-
return (0);
}
@@ -873,6 +855,14 @@
callout_stop(&d->bd_callout);
timed_out = (d->bd_state == BPF_TIMED_OUT);
d->bd_state = BPF_IDLE;
+ while (d->bd_hbuf_in_use) {
+ error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
+ PRINET|PCATCH, "bd_hbuf", 0);
+ if (error != 0) {
+ BPFD_UNLOCK(d);
+ return (error);
+ }
+ }
/*
* If the hold buffer is empty, then do a timed sleep, which
* ends when the timeout expires or when enough packets
@@ -941,6 +931,7 @@
/*
* At this point, we know we have something in the hold slot.
*/
+ d->bd_hbuf_in_use = 1;
BPFD_UNLOCK(d);
/*
@@ -947,18 +938,20 @@
* Move data from hold buffer into user space.
* We know the entire buffer is transferred since
* we checked above that the read buffer is bpf_bufsize bytes.
- *
- * XXXRW: More synchronization needed here: what if a second thread
- * issues a read on the same fd at the same time? Don't want this
- * getting invalidated.
+ *
+ * We do not have to worry about simultaneous reads because
+ * we waited for sole access to the hold buffer above.
*/
error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
BPFD_LOCK(d);
+ KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
d->bd_fbuf = d->bd_hbuf;
d->bd_hbuf = NULL;
d->bd_hlen = 0;
bpf_buf_reclaimed(d);
+ d->bd_hbuf_in_use = 0;
+ wakeup(&d->bd_hbuf_in_use);
BPFD_UNLOCK(d);
return (error);
@@ -1062,7 +1055,7 @@
dst.sa_family = pseudo_AF_HDRCMPLT;
if (d->bd_feedback) {
- mc = m_dup(m, M_DONTWAIT);
+ mc = m_dup(m, M_NOWAIT);
if (mc != NULL)
mc->m_pkthdr.rcvif = ifp;
/* Set M_PROMISC for outgoing packets to be discarded. */
@@ -1112,6 +1105,9 @@
BPFD_LOCK_ASSERT(d);
+ while (d->bd_hbuf_in_use)
+ mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET,
+ "bd_hbuf", 0);
if ((d->bd_hbuf != NULL) &&
(d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
/* Free the hold buffer. */
@@ -1195,7 +1191,7 @@
#endif
case BIOCGETIF:
case BIOCGRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
case BIOCGRTIMEOUT32:
#endif
case BIOCGSTATS:
@@ -1207,7 +1203,7 @@
case FIONREAD:
case BIOCLOCK:
case BIOCSRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
case BIOCSRTIMEOUT32:
#endif
case BIOCIMMEDIATE:
@@ -1252,6 +1248,9 @@
BPFD_LOCK(d);
n = d->bd_slen;
+ while (d->bd_hbuf_in_use)
+ mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
+ PRINET, "bd_hbuf", 0);
if (d->bd_hbuf)
n += d->bd_hlen;
BPFD_UNLOCK(d);
@@ -1409,21 +1408,44 @@
* Set interface.
*/
case BIOCSETIF:
- BPF_LOCK();
- error = bpf_setif(d, (struct ifreq *)addr);
- BPF_UNLOCK();
- break;
+ {
+ int alloc_buf, size;
+ /*
+ * Behavior here depends on the buffering model. If
+ * we're using kernel memory buffers, then we can
+ * allocate them here. If we're using zero-copy,
+ * then the user process must have registered buffers
+ * by the time we get here.
+ */
+ alloc_buf = 0;
+ BPFD_LOCK(d);
+ if (d->bd_bufmode == BPF_BUFMODE_BUFFER &&
+ d->bd_sbuf == NULL)
+ alloc_buf = 1;
+ BPFD_UNLOCK(d);
+ if (alloc_buf) {
+ size = d->bd_bufsize;
+ error = bpf_buffer_ioctl_sblen(d, &size);
+ if (error != 0)
+ break;
+ }
+ BPF_LOCK();
+ error = bpf_setif(d, (struct ifreq *)addr);
+ BPF_UNLOCK();
+ break;
+ }
+
/*
* Set read timeout.
*/
case BIOCSRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
case BIOCSRTIMEOUT32:
#endif
{
struct timeval *tv = (struct timeval *)addr;
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
struct timeval32 *tv32;
struct timeval tv64;
@@ -1449,12 +1471,12 @@
* Get read timeout.
*/
case BIOCGRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
case BIOCGRTIMEOUT32:
#endif
{
struct timeval *tv;
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
struct timeval32 *tv32;
struct timeval tv64;
@@ -1466,7 +1488,7 @@
tv->tv_sec = d->bd_rtout / hz;
tv->tv_usec = (d->bd_rtout % hz) * tick;
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
if (cmd == BIOCGRTIMEOUT32) {
tv32 = (struct timeval32 *)addr;
tv32->tv_sec = tv->tv_sec;
@@ -1859,10 +1881,8 @@
BPFIF_RUNLOCK(bp);
/*
- * Behavior here depends on the buffering model. If we're using
- * kernel memory buffers, then we can allocate them here. If we're
- * using zero-copy, then the user process must have registered
- * buffers by the time we get here. If not, return an error.
+ * At this point, we expect the buffer is already allocated. If not,
+ * return an error.
*/
switch (d->bd_bufmode) {
case BPF_BUFMODE_BUFFER:
@@ -1965,7 +1985,10 @@
ready = bpf_ready(d);
if (ready) {
kn->kn_data = d->bd_slen;
- if (d->bd_hbuf)
+ /*
+ * Ignore the hold buffer if it is being copied to user space.
+ */
+ if (!d->bd_hbuf_in_use && d->bd_hbuf)
kn->kn_data += d->bd_hlen;
} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
callout_reset(&d->bd_callout, d->bd_rtout,
@@ -2339,6 +2362,7 @@
++d->bd_dcount;
return;
}
+ KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use"));
ROTATE_BUFFERS(d);
do_wakeup = 1;
curlen = 0;
@@ -2477,7 +2501,7 @@
bp->bif_hdrlen = hdrlen;
- if (bootverbose)
+ if (bootverbose && IS_DEFAULT_VNET(curvnet))
if_printf(ifp, "bpf attached\n");
}
@@ -2489,52 +2513,51 @@
void
bpfdetach(struct ifnet *ifp)
{
- struct bpf_if *bp;
+ struct bpf_if *bp, *bp_temp;
struct bpf_d *d;
-#ifdef INVARIANTS
int ndetached;
ndetached = 0;
-#endif
BPF_LOCK();
/* Find all bpf_if struct's which reference ifp and detach them. */
- do {
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
- if (ifp == bp->bif_ifp)
- break;
- }
- if (bp != NULL)
- LIST_REMOVE(bp, bif_next);
+ LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
+ if (ifp != bp->bif_ifp)
+ continue;
- if (bp != NULL) {
-#ifdef INVARIANTS
- ndetached++;
-#endif
- while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
- bpf_detachd_locked(d);
- BPFD_LOCK(d);
- bpf_wakeup(d);
- BPFD_UNLOCK(d);
- }
- /* Free writer-only descriptors */
- while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
- bpf_detachd_locked(d);
- BPFD_LOCK(d);
- bpf_wakeup(d);
- BPFD_UNLOCK(d);
- }
+ LIST_REMOVE(bp, bif_next);
+ /* Add to to-be-freed list */
+ LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next);
- /*
- * Delay freing bp till interface is detached
- * and all routes through this interface are removed.
- * Mark bp as detached to restrict new consumers.
- */
- BPFIF_WLOCK(bp);
- bp->flags |= BPFIF_FLAG_DYING;
- BPFIF_WUNLOCK(bp);
+ ndetached++;
+ /*
+ * Delay freeing bp till interface is detached
+ * and all routes through this interface are removed.
+ * Mark bp as detached to restrict new consumers.
+ */
+ BPFIF_WLOCK(bp);
+ bp->flags |= BPFIF_FLAG_DYING;
+ BPFIF_WUNLOCK(bp);
+
+ CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p",
+ __func__, bp->bif_dlt, bp, ifp);
+
+ /* Free common descriptors */
+ while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
+ bpf_detachd_locked(d);
+ BPFD_LOCK(d);
+ bpf_wakeup(d);
+ BPFD_UNLOCK(d);
}
- } while (bp != NULL);
+
+ /* Free writer-only descriptors */
+ while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
+ bpf_detachd_locked(d);
+ BPFD_LOCK(d);
+ bpf_wakeup(d);
+ BPFD_UNLOCK(d);
+ }
+ }
BPF_UNLOCK();
#ifdef INVARIANTS
@@ -2546,32 +2569,50 @@
/*
* Interface departure handler.
* Note departure event does not guarantee interface is going down.
+ * Interface renaming is currently done via departure/arrival event set.
+ *
+ * Departure handled is called after all routes pointing to
+ * given interface are removed and interface is in down state
+ * restricting any packets to be sent/received. We assume it is now safe
+ * to free data allocated by BPF.
*/
static void
bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
{
- struct bpf_if *bp;
+ struct bpf_if *bp, *bp_temp;
+ int nmatched = 0;
- BPF_LOCK();
- if ((bp = ifp->if_bpf) == NULL) {
- BPF_UNLOCK();
+ /* Ignore ifnet renaming. */
+ if (ifp->if_flags & IFF_RENAMING)
return;
- }
- /* Check if bpfdetach() was called previously */
- if ((bp->flags & BPFIF_FLAG_DYING) == 0) {
- BPF_UNLOCK();
- return;
- }
+ BPF_LOCK();
+ /*
+ * Find matching entries in free list.
+ * Nothing should be found if bpfdetach() was not called.
+ */
+ LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) {
+ if (ifp != bp->bif_ifp)
+ continue;
- CTR3(KTR_NET, "%s: freing BPF instance %p for interface %p",
- __func__, bp, ifp);
+ CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p",
+ __func__, bp, ifp);
- ifp->if_bpf = NULL;
+ LIST_REMOVE(bp, bif_next);
+
+ rw_destroy(&bp->bif_lock);
+ free(bp, M_BPF);
+
+ nmatched++;
+ }
BPF_UNLOCK();
- rw_destroy(&bp->bif_lock);
- free(bp, M_BPF);
+ /*
+ * Note that we cannot zero other pointers to
+ * custom DLTs possibly used by given interface.
+ */
+ if (nmatched != 0)
+ ifp->if_bpf = NULL;
}
/*
@@ -2580,26 +2621,44 @@
static int
bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
{
- int n, error;
struct ifnet *ifp;
struct bpf_if *bp;
+ u_int *lst;
+ int error, n, n1;
BPF_LOCK_ASSERT();
ifp = d->bd_bif->bif_ifp;
+again:
+ n1 = 0;
+ LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ if (bp->bif_ifp == ifp)
+ n1++;
+ }
+ if (bfl->bfl_list == NULL) {
+ bfl->bfl_len = n1;
+ return (0);
+ }
+ if (n1 > bfl->bfl_len)
+ return (ENOMEM);
+ BPF_UNLOCK();
+ lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
n = 0;
- error = 0;
+ BPF_LOCK();
LIST_FOREACH(bp, &bpf_iflist, bif_next) {
if (bp->bif_ifp != ifp)
continue;
- if (bfl->bfl_list != NULL) {
- if (n >= bfl->bfl_len)
- return (ENOMEM);
- error = copyout(&bp->bif_dlt,
- bfl->bfl_list + n, sizeof(u_int));
+ if (n >= n1) {
+ free(lst, M_TEMP);
+ goto again;
}
+ lst[n] = bp->bif_dlt;
n++;
}
+ BPF_UNLOCK();
+ error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
+ free(lst, M_TEMP);
+ BPF_LOCK();
bfl->bfl_len = n;
return (error);
}
@@ -2651,6 +2710,7 @@
mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
LIST_INIT(&bpf_iflist);
+ LIST_INIT(&bpf_freelist);
dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
/* For compatibility */
@@ -2732,7 +2792,8 @@
static int
bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
{
- struct xbpf_d *xbdbuf, *xbd, zerostats;
+ static const struct xbpf_d zerostats;
+ struct xbpf_d *xbdbuf, *xbd, tempstats;
int index, error;
struct bpf_if *bp;
struct bpf_d *bd;
@@ -2752,11 +2813,13 @@
* as we aren't allowing the user to set the counters currently.
*/
if (req->newptr != NULL) {
- if (req->newlen != sizeof(zerostats))
+ if (req->newlen != sizeof(tempstats))
return (EINVAL);
- bzero(&zerostats, sizeof(zerostats));
- xbd = req->newptr;
- if (bcmp(xbd, &zerostats, sizeof(*xbd)) != 0)
+ memset(&tempstats, 0, sizeof(tempstats));
+ error = SYSCTL_IN(req, &tempstats, sizeof(tempstats));
+ if (error)
+ return (error);
+ if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0)
return (EINVAL);
bpf_zero_counters();
return (0);
Modified: trunk/sys/net/bpf.h
===================================================================
--- trunk/sys/net/bpf.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -35,7 +35,7 @@
* @(#)bpf.h 8.1 (Berkeley) 6/10/93
* @(#)bpf.h 1.34 (LBL) 6/16/96
*
- * $FreeBSD: stable/9/sys/net/bpf.h 247629 2013-03-02 15:11:20Z melifaro $
+ * $FreeBSD: stable/10/sys/net/bpf.h 286856 2015-08-17 18:43:39Z loos $
*/
#ifndef _NET_BPF_H_
@@ -1235,8 +1235,9 @@
/*
* Rotate the packet buffers in descriptor d. Move the store buffer into the
- * hold slot, and the free buffer ino the store slot. Zero the length of the
- * new store buffer. Descriptor lock should be held.
+ * hold slot, and the free buffer into the store slot. Zero the length of the
+ * new store buffer. Descriptor lock should be held. One must be careful to
+ * not rotate the buffers twice, i.e. if fbuf != NULL.
*/
#define ROTATE_BUFFERS(d) do { \
(d)->bd_hbuf = (d)->bd_sbuf; \
Modified: trunk/sys/net/bpf_buffer.c
===================================================================
--- trunk/sys/net/bpf_buffer.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf_buffer.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -63,7 +63,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/bpf_buffer.c 247629 2013-03-02 15:11:20Z melifaro $");
+__FBSDID("$FreeBSD: stable/10/sys/net/bpf_buffer.c 286856 2015-08-17 18:43:39Z loos $");
#include "opt_bpf.h"
Modified: trunk/sys/net/bpf_buffer.h
===================================================================
--- trunk/sys/net/bpf_buffer.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf_buffer.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -27,7 +27,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/bpf_buffer.h 247629 2013-03-02 15:11:20Z melifaro $
+ * $FreeBSD: stable/10/sys/net/bpf_buffer.h 235746 2012-05-21 22:19:19Z melifaro $
*/
#ifndef _NET_BPF_BUFFER_H_
Modified: trunk/sys/net/bpf_filter.c
===================================================================
--- trunk/sys/net/bpf_filter.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf_filter.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -36,10 +36,13 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/bpf_filter.c 224044 2011-07-14 21:06:22Z mp $");
+__FBSDID("$FreeBSD: stable/10/sys/net/bpf_filter.c 264457 2014-04-14 13:30:08Z jmmv $");
#include <sys/param.h>
+#if !defined(_KERNEL)
+#include <strings.h>
+#endif
#if !defined(_KERNEL) || defined(sun)
#include <netinet/in.h>
#endif
Modified: trunk/sys/net/bpf_jitter.c
===================================================================
--- trunk/sys/net/bpf_jitter.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf_jitter.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/bpf_jitter.c 199615 2009-11-20 21:12:40Z jkim $");
+__FBSDID("$FreeBSD: stable/10/sys/net/bpf_jitter.c 199615 2009-11-20 21:12:40Z jkim $");
#ifdef _KERNEL
#include "opt_bpf.h"
Modified: trunk/sys/net/bpf_jitter.h
===================================================================
--- trunk/sys/net/bpf_jitter.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf_jitter.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -29,7 +29,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/bpf_jitter.h 199603 2009-11-20 18:49:20Z jkim $
+ * $FreeBSD: stable/10/sys/net/bpf_jitter.h 199603 2009-11-20 18:49:20Z jkim $
*/
#ifndef _NET_BPF_JITTER_H_
Modified: trunk/sys/net/bpf_zerocopy.c
===================================================================
--- trunk/sys/net/bpf_zerocopy.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf_zerocopy.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -29,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/bpf_zerocopy.c 240238 2012-09-08 16:40:18Z kib $");
+__FBSDID("$FreeBSD: stable/10/sys/net/bpf_zerocopy.c 239065 2012-08-05 14:11:42Z kib $");
#include "opt_bpf.h"
Modified: trunk/sys/net/bpf_zerocopy.h
===================================================================
--- trunk/sys/net/bpf_zerocopy.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpf_zerocopy.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -27,7 +27,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/bpf_zerocopy.h 234969 2012-05-03 16:48:48Z eadler $
+ * $FreeBSD: stable/10/sys/net/bpf_zerocopy.h 230108 2012-01-14 17:07:52Z eadler $
*/
#ifndef _NET_BPF_ZEROCOPY_H_
Modified: trunk/sys/net/bpfdesc.h
===================================================================
--- trunk/sys/net/bpfdesc.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bpfdesc.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -34,7 +34,7 @@
*
* @(#)bpfdesc.h 8.1 (Berkeley) 6/10/93
*
- * $FreeBSD: stable/9/sys/net/bpfdesc.h 247629 2013-03-02 15:11:20Z melifaro $
+ * $FreeBSD: stable/10/sys/net/bpfdesc.h 244090 2012-12-10 16:14:44Z ghelmer $
*/
#ifndef _NET_BPFDESC_H_
@@ -64,6 +64,7 @@
caddr_t bd_sbuf; /* store slot */
caddr_t bd_hbuf; /* hold slot */
caddr_t bd_fbuf; /* free slot */
+ int bd_hbuf_in_use; /* don't rotate buffers */
int bd_slen; /* current length of store buffer */
int bd_hlen; /* current length of hold buffer */
Modified: trunk/sys/net/bridgestp.c
===================================================================
--- trunk/sys/net/bridgestp.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bridgestp.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/bridgestp.c 236052 2012-05-26 07:43:17Z thompsa $");
+__FBSDID("$FreeBSD: stable/10/sys/net/bridgestp.c 248324 2013-03-15 12:55:30Z glebius $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -235,7 +235,7 @@
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
return;
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return;
@@ -349,7 +349,7 @@
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
return;
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return;
Modified: trunk/sys/net/bridgestp.h
===================================================================
--- trunk/sys/net/bridgestp.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/bridgestp.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -68,7 +68,7 @@
*
* OpenBSD: if_bridge.h,v 1.14 2001/03/22 03:48:29 jason Exp
*
- * $FreeBSD: stable/9/sys/net/bridgestp.h 236052 2012-05-26 07:43:17Z thompsa $
+ * $FreeBSD: stable/10/sys/net/bridgestp.h 234488 2012-04-20 10:06:28Z thompsa $
*/
/*
Modified: trunk/sys/net/ethernet.h
===================================================================
--- trunk/sys/net/ethernet.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/ethernet.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -2,7 +2,7 @@
/*
* Fundamental constants relating to ethernet.
*
- * $FreeBSD: stable/9/sys/net/ethernet.h 191148 2009-04-16 20:30:28Z kmacy $
+ * $FreeBSD: stable/10/sys/net/ethernet.h 321752 2017-07-31 03:49:08Z sephe $
*
*/
@@ -315,6 +315,7 @@
#define ETHERTYPE_SLOW 0x8809 /* 802.3ad link aggregation (LACP) */
#define ETHERTYPE_PPP 0x880B /* PPP (obsolete by PPPoE) */
#define ETHERTYPE_HITACHI 0x8820 /* Hitachi Cable (Optoelectronic Systems Laboratory) */
+#define ETHERTYPE_TEST 0x8822 /* Network Conformance Testing */
#define ETHERTYPE_MPLS 0x8847 /* MPLS Unicast */
#define ETHERTYPE_MPLS_MCAST 0x8848 /* MPLS Multicast */
#define ETHERTYPE_AXIS 0x8856 /* Axis Communications AB proprietary bootstrap/config */
@@ -376,8 +377,8 @@
extern void ether_ifattach(struct ifnet *, const u_int8_t *);
extern void ether_ifdetach(struct ifnet *);
extern int ether_ioctl(struct ifnet *, u_long, caddr_t);
-extern int ether_output(struct ifnet *,
- struct mbuf *, struct sockaddr *, struct route *);
+extern int ether_output(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
extern int ether_output_frame(struct ifnet *, struct mbuf *);
extern char *ether_sprintf(const u_int8_t *);
void ether_vlan_mtap(struct bpf_if *, struct mbuf *,
@@ -384,6 +385,12 @@
void *, u_int);
struct mbuf *ether_vlanencap(struct mbuf *, uint16_t);
+#ifdef SYS_EVENTHANDLER_H
+/* new ethernet interface attached event */
+typedef void (*ether_ifattach_event_handler_t)(void *, struct ifnet *);
+EVENTHANDLER_DECLARE(ether_ifattach_event, ether_ifattach_event_handler_t);
+#endif
+
#else /* _KERNEL */
#include <sys/cdefs.h>
Modified: trunk/sys/net/fddi.h
===================================================================
--- trunk/sys/net/fddi.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/fddi.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)if_fddi.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/fddi.h 194581 2009-06-21 10:29:31Z rdivacky $
+ * $FreeBSD: stable/10/sys/net/fddi.h 194581 2009-06-21 10:29:31Z rdivacky $
*/
#ifndef _NETINET_IF_FDDI_H_
Modified: trunk/sys/net/firewire.h
===================================================================
--- trunk/sys/net/firewire.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/firewire.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -24,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/firewire.h 194581 2009-06-21 10:29:31Z rdivacky $
+ * $FreeBSD: stable/10/sys/net/firewire.h 194581 2009-06-21 10:29:31Z rdivacky $
*/
#ifndef _NET_FIREWIRE_H_
Modified: trunk/sys/net/flowtable.c
===================================================================
--- trunk/sys/net/flowtable.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/flowtable.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,33 +1,32 @@
/* $MidnightBSD$ */
-/**************************************************************************
+/*-
+ * Copyright (c) 2014 Gleb Smirnoff <glebius at FreeBSD.org>
+ * Copyright (c) 2008-2010, BitGravity Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Neither the name of the BitGravity Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
-Copyright (c) 2008-2010, BitGravity Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the BitGravity Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
-
#include "opt_route.h"
#include "opt_mpath.h"
#include "opt_ddb.h"
@@ -35,19 +34,22 @@
#include "opt_inet6.h"
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/flowtable.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/flowtable.c 281955 2015-04-24 23:26:44Z hiren $");
-#include <sys/param.h>
+#include <sys/param.h>
#include <sys/types.h>
#include <sys/bitstring.h>
#include <sys/condvar.h>
#include <sys/callout.h>
-#include <sys/kernel.h>
+#include <sys/hash.h>
+#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/pcpu.h>
#include <sys/proc.h>
+#include <sys/queue.h>
#include <sys/sbuf.h>
#include <sys/sched.h>
#include <sys/smp.h>
@@ -54,11 +56,12 @@
#include <sys/socket.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
+#include <vm/uma.h>
#include <net/if.h>
#include <net/if_llatbl.h>
#include <net/if_var.h>
-#include <net/route.h>
+#include <net/route.h>
#include <net/flowtable.h>
#include <net/vnet.h>
@@ -70,157 +73,79 @@
#ifdef INET6
#include <netinet/ip6.h>
#endif
+#ifdef FLOWTABLE_HASH_ALL
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/sctp.h>
+#endif
-#include <libkern/jenkins.h>
#include <ddb/ddb.h>
-struct ipv4_tuple {
- uint16_t ip_sport; /* source port */
- uint16_t ip_dport; /* destination port */
- in_addr_t ip_saddr; /* source address */
- in_addr_t ip_daddr; /* destination address */
-};
+#ifdef FLOWTABLE_HASH_ALL
+#define KEY_PORTS (sizeof(uint16_t) * 2)
+#define KEY_ADDRS 2
+#else
+#define KEY_PORTS 0
+#define KEY_ADDRS 1
+#endif
-union ipv4_flow {
- struct ipv4_tuple ipf_ipt;
- uint32_t ipf_key[3];
-};
+#ifdef INET6
+#define KEY_ADDR_LEN sizeof(struct in6_addr)
+#else
+#define KEY_ADDR_LEN sizeof(struct in_addr)
+#endif
-struct ipv6_tuple {
- uint16_t ip_sport; /* source port */
- uint16_t ip_dport; /* destination port */
- struct in6_addr ip_saddr; /* source address */
- struct in6_addr ip_daddr; /* destination address */
-};
+#define KEYLEN ((KEY_ADDR_LEN * KEY_ADDRS + KEY_PORTS) / sizeof(uint32_t))
-union ipv6_flow {
- struct ipv6_tuple ipf_ipt;
- uint32_t ipf_key[9];
-};
-
struct flentry {
- volatile uint32_t f_fhash; /* hash flowing forward */
- uint16_t f_flags; /* flow flags */
- uint8_t f_pad;
+ uint32_t f_hash; /* hash flowing forward */
+ uint32_t f_key[KEYLEN]; /* address(es and ports) */
+ uint32_t f_uptime; /* uptime at last access */
+ uint16_t f_fibnum; /* fib index */
+#ifdef FLOWTABLE_HASH_ALL
uint8_t f_proto; /* protocol */
- uint32_t f_fibnum; /* fib index */
- uint32_t f_uptime; /* uptime at last access */
- struct flentry *f_next; /* pointer to collision entry */
- volatile struct rtentry *f_rt; /* rtentry for flow */
- volatile struct llentry *f_lle; /* llentry for flow */
+ uint8_t f_flags; /* stale? */
+#define FL_STALE 1
+#endif
+ SLIST_ENTRY(flentry) f_next; /* pointer to collision entry */
+ struct rtentry *f_rt; /* rtentry for flow */
+ struct llentry *f_lle; /* llentry for flow */
};
+#undef KEYLEN
-struct flentry_v4 {
- struct flentry fl_entry;
- union ipv4_flow fl_flow;
-};
+SLIST_HEAD(flist, flentry);
+/* Make sure we can use pcpu_zone_ptr for struct flist. */
+CTASSERT(sizeof(struct flist) == sizeof(void *));
-struct flentry_v6 {
- struct flentry fl_entry;
- union ipv6_flow fl_flow;
-};
-
-#define fl_fhash fl_entry.fl_fhash
-#define fl_flags fl_entry.fl_flags
-#define fl_proto fl_entry.fl_proto
-#define fl_uptime fl_entry.fl_uptime
-#define fl_rt fl_entry.fl_rt
-#define fl_lle fl_entry.fl_lle
-
-#define SECS_PER_HOUR 3600
-#define SECS_PER_DAY (24*SECS_PER_HOUR)
-
-#define SYN_IDLE 300
-#define UDP_IDLE 300
-#define FIN_WAIT_IDLE 600
-#define TCP_IDLE SECS_PER_DAY
-
-
-typedef void fl_lock_t(struct flowtable *, uint32_t);
-typedef void fl_rtalloc_t(struct route *, uint32_t, u_int);
-
-union flentryp {
- struct flentry **global;
- struct flentry **pcpu[MAXCPU];
-};
-
-struct flowtable_stats {
- uint64_t ft_collisions;
- uint64_t ft_allocated;
- uint64_t ft_misses;
- uint64_t ft_max_depth;
- uint64_t ft_free_checks;
- uint64_t ft_frees;
- uint64_t ft_hits;
- uint64_t ft_lookups;
-} __aligned(CACHE_LINE_SIZE);
-
struct flowtable {
- struct flowtable_stats ft_stats[MAXCPU];
+ counter_u64_t *ft_stat;
int ft_size;
- int ft_lock_count;
- uint32_t ft_flags;
- char *ft_name;
- fl_lock_t *ft_lock;
- fl_lock_t *ft_unlock;
- fl_rtalloc_t *ft_rtalloc;
/*
- * XXX need to pad out
- */
- struct mtx *ft_locks;
- union flentryp ft_table;
- bitstr_t *ft_masks[MAXCPU];
+ * ft_table is a malloc(9)ed array of pointers. Pointers point to
+ * memory from UMA_ZONE_PCPU zone.
+ * ft_masks is per-cpu pointer itself. Each instance points
+ * to a malloc(9)ed bitset, that is private to corresponding CPU.
+ */
+ struct flist **ft_table;
+ bitstr_t **ft_masks;
bitstr_t *ft_tmpmask;
- struct flowtable *ft_next;
+};
- uint32_t ft_count __aligned(CACHE_LINE_SIZE);
- uint32_t ft_udp_idle __aligned(CACHE_LINE_SIZE);
- uint32_t ft_fin_wait_idle;
- uint32_t ft_syn_idle;
- uint32_t ft_tcp_idle;
- boolean_t ft_full;
-} __aligned(CACHE_LINE_SIZE);
+#define FLOWSTAT_ADD(ft, name, v) \
+ counter_u64_add((ft)->ft_stat[offsetof(struct flowtable_stat, name) / sizeof(uint64_t)], (v))
+#define FLOWSTAT_INC(ft, name) FLOWSTAT_ADD(ft, name, 1)
static struct proc *flowcleanerproc;
-static VNET_DEFINE(struct flowtable *, flow_list_head);
-static VNET_DEFINE(uint32_t, flow_hashjitter);
-static VNET_DEFINE(uma_zone_t, flow_ipv4_zone);
-static VNET_DEFINE(uma_zone_t, flow_ipv6_zone);
+static uint32_t flow_hashjitter;
-#define V_flow_list_head VNET(flow_list_head)
-#define V_flow_hashjitter VNET(flow_hashjitter)
-#define V_flow_ipv4_zone VNET(flow_ipv4_zone)
-#define V_flow_ipv6_zone VNET(flow_ipv6_zone)
-
-
static struct cv flowclean_f_cv;
static struct cv flowclean_c_cv;
static struct mtx flowclean_lock;
static uint32_t flowclean_cycles;
-static uint32_t flowclean_freq;
-#ifdef FLOWTABLE_DEBUG
-#define FLDPRINTF(ft, flags, fmt, ...) \
-do { \
- if ((ft)->ft_flags & (flags)) \
- printf((fmt), __VA_ARGS__); \
-} while (0); \
-
-#else
-#define FLDPRINTF(ft, flags, fmt, ...)
-
-#endif
-
-
/*
* TODO:
- * - Make flowtable stats per-cpu, aggregated at sysctl call time,
- * to avoid extra cache evictions caused by incrementing a shared
- * counter
- * - add sysctls to resize && flush flow tables
+ * - add sysctls to resize && flush flow tables
* - Add per flowtable sysctls for statistics and configuring timeouts
* - add saturation counter to rtentry to support per-packet load-balancing
* add flag to indicate round-robin flow, add list lookup from head
@@ -231,396 +156,117 @@
* - support explicit connection state (currently only ad-hoc for DSR)
* - idetach() cleanup for options VIMAGE builds.
*/
-VNET_DEFINE(int, flowtable_enable) = 1;
-static VNET_DEFINE(int, flowtable_debug);
-static VNET_DEFINE(int, flowtable_syn_expire) = SYN_IDLE;
-static VNET_DEFINE(int, flowtable_udp_expire) = UDP_IDLE;
-static VNET_DEFINE(int, flowtable_fin_wait_expire) = FIN_WAIT_IDLE;
-static VNET_DEFINE(int, flowtable_tcp_expire) = TCP_IDLE;
-static VNET_DEFINE(int, flowtable_nmbflows);
-static VNET_DEFINE(int, flowtable_ready) = 0;
+#ifdef INET
+static VNET_DEFINE(struct flowtable, ip4_ft);
+#define V_ip4_ft VNET(ip4_ft)
+#endif
+#ifdef INET6
+static VNET_DEFINE(struct flowtable, ip6_ft);
+#define V_ip6_ft VNET(ip6_ft)
+#endif
+static uma_zone_t flow_zone;
+
+static VNET_DEFINE(int, flowtable_enable) = 1;
#define V_flowtable_enable VNET(flowtable_enable)
-#define V_flowtable_debug VNET(flowtable_debug)
-#define V_flowtable_syn_expire VNET(flowtable_syn_expire)
-#define V_flowtable_udp_expire VNET(flowtable_udp_expire)
-#define V_flowtable_fin_wait_expire VNET(flowtable_fin_wait_expire)
-#define V_flowtable_tcp_expire VNET(flowtable_tcp_expire)
-#define V_flowtable_nmbflows VNET(flowtable_nmbflows)
-#define V_flowtable_ready VNET(flowtable_ready)
-static SYSCTL_NODE(_net_inet, OID_AUTO, flowtable, CTLFLAG_RD, NULL,
+static SYSCTL_NODE(_net, OID_AUTO, flowtable, CTLFLAG_RD, NULL,
"flowtable");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, debug, CTLFLAG_RW,
- &VNET_NAME(flowtable_debug), 0, "print debug info.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, enable, CTLFLAG_RW,
+SYSCTL_VNET_INT(_net_flowtable, OID_AUTO, enable, CTLFLAG_RW,
&VNET_NAME(flowtable_enable), 0, "enable flowtable caching.");
+SYSCTL_UMA_MAX(_net_flowtable, OID_AUTO, maxflows, CTLFLAG_RW,
+ &flow_zone, "Maximum number of flows allowed");
-/*
- * XXX This does not end up updating timeouts at runtime
- * and only reflects the value for the last table added :-/
- */
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, syn_expire, CTLFLAG_RW,
- &VNET_NAME(flowtable_syn_expire), 0,
- "seconds after which to remove syn allocated flow.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, udp_expire, CTLFLAG_RW,
- &VNET_NAME(flowtable_udp_expire), 0,
- "seconds after which to remove flow allocated to UDP.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, fin_wait_expire, CTLFLAG_RW,
- &VNET_NAME(flowtable_fin_wait_expire), 0,
- "seconds after which to remove a flow in FIN_WAIT.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, tcp_expire, CTLFLAG_RW,
- &VNET_NAME(flowtable_tcp_expire), 0,
- "seconds after which to remove flow allocated to a TCP connection.");
+static MALLOC_DEFINE(M_FTABLE, "flowtable", "flowtable hashes and bitstrings");
+static struct flentry *
+flowtable_lookup_common(struct flowtable *, uint32_t *, int, uint32_t);
-/*
- * Maximum number of flows that can be allocated of a given type.
- *
- * The table is allocated at boot time (for the pure caching case
- * there is no reason why this could not be changed at runtime)
- * and thus (currently) needs to be set with a tunable.
- */
-static int
-sysctl_nmbflows(SYSCTL_HANDLER_ARGS)
-{
- int error, newnmbflows;
-
- newnmbflows = V_flowtable_nmbflows;
- error = sysctl_handle_int(oidp, &newnmbflows, 0, req);
- if (error == 0 && req->newptr) {
- if (newnmbflows > V_flowtable_nmbflows) {
- V_flowtable_nmbflows = newnmbflows;
- uma_zone_set_max(V_flow_ipv4_zone,
- V_flowtable_nmbflows);
- uma_zone_set_max(V_flow_ipv6_zone,
- V_flowtable_nmbflows);
- } else
- error = EINVAL;
- }
- return (error);
-}
-SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, nmbflows,
- CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_nmbflows, "IU",
- "Maximum number of flows allowed");
-
-
-
-#define FS_PRINT(sb, field) sbuf_printf((sb), "\t%s: %jd\n", #field, fs->ft_##field)
-
-static void
-fs_print(struct sbuf *sb, struct flowtable_stats *fs)
-{
-
- FS_PRINT(sb, collisions);
- FS_PRINT(sb, allocated);
- FS_PRINT(sb, misses);
- FS_PRINT(sb, max_depth);
- FS_PRINT(sb, free_checks);
- FS_PRINT(sb, frees);
- FS_PRINT(sb, hits);
- FS_PRINT(sb, lookups);
-}
-
-static void
-flowtable_show_stats(struct sbuf *sb, struct flowtable *ft)
-{
- int i;
- struct flowtable_stats fs, *pfs;
-
- if (ft->ft_flags & FL_PCPU) {
- bzero(&fs, sizeof(fs));
- pfs = &fs;
- CPU_FOREACH(i) {
- pfs->ft_collisions += ft->ft_stats[i].ft_collisions;
- pfs->ft_allocated += ft->ft_stats[i].ft_allocated;
- pfs->ft_misses += ft->ft_stats[i].ft_misses;
- pfs->ft_free_checks += ft->ft_stats[i].ft_free_checks;
- pfs->ft_frees += ft->ft_stats[i].ft_frees;
- pfs->ft_hits += ft->ft_stats[i].ft_hits;
- pfs->ft_lookups += ft->ft_stats[i].ft_lookups;
- if (ft->ft_stats[i].ft_max_depth > pfs->ft_max_depth)
- pfs->ft_max_depth = ft->ft_stats[i].ft_max_depth;
- }
- } else {
- pfs = &ft->ft_stats[0];
- }
- fs_print(sb, pfs);
-}
-
-static int
-sysctl_flowtable_stats(SYSCTL_HANDLER_ARGS)
-{
- struct flowtable *ft;
- struct sbuf *sb;
- int error;
-
- sb = sbuf_new(NULL, NULL, 64*1024, SBUF_FIXEDLEN);
-
- ft = V_flow_list_head;
- while (ft != NULL) {
- sbuf_printf(sb, "\ntable name: %s\n", ft->ft_name);
- flowtable_show_stats(sb, ft);
- ft = ft->ft_next;
- }
- sbuf_finish(sb);
- error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
- sbuf_delete(sb);
-
- return (error);
-}
-SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD,
- NULL, 0, sysctl_flowtable_stats, "A", "flowtable statistics");
-
-
-#ifndef RADIX_MPATH
-static void
-rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum)
-{
-
- rtalloc_ign_fib(ro, 0, fibnum);
-}
-#endif
-
-static void
-flowtable_global_lock(struct flowtable *table, uint32_t hash)
-{
- int lock_index = (hash)&(table->ft_lock_count - 1);
-
- mtx_lock(&table->ft_locks[lock_index]);
-}
-
-static void
-flowtable_global_unlock(struct flowtable *table, uint32_t hash)
-{
- int lock_index = (hash)&(table->ft_lock_count - 1);
-
- mtx_unlock(&table->ft_locks[lock_index]);
-}
-
-static void
-flowtable_pcpu_lock(struct flowtable *table, uint32_t hash)
-{
-
- critical_enter();
-}
-
-static void
-flowtable_pcpu_unlock(struct flowtable *table, uint32_t hash)
-{
-
- critical_exit();
-}
-
-#define FL_ENTRY_INDEX(table, hash)((hash) % (table)->ft_size)
-#define FL_ENTRY(table, hash) *flowtable_entry((table), (hash))
-#define FL_ENTRY_LOCK(table, hash) (table)->ft_lock((table), (hash))
-#define FL_ENTRY_UNLOCK(table, hash) (table)->ft_unlock((table), (hash))
-
-#define FL_STALE (1<<8)
-#define FL_OVERWRITE (1<<10)
-
-void
-flow_invalidate(struct flentry *fle)
-{
-
- fle->f_flags |= FL_STALE;
-}
-
-static __inline int
-proto_to_flags(uint8_t proto)
-{
- int flag;
-
- switch (proto) {
- case IPPROTO_TCP:
- flag = FL_TCP;
- break;
- case IPPROTO_SCTP:
- flag = FL_SCTP;
- break;
- case IPPROTO_UDP:
- flag = FL_UDP;
- break;
- default:
- flag = 0;
- break;
- }
-
- return (flag);
-}
-
-static __inline int
-flags_to_proto(int flags)
-{
- int proto, protoflags;
-
- protoflags = flags & (FL_TCP|FL_SCTP|FL_UDP);
- switch (protoflags) {
- case FL_TCP:
- proto = IPPROTO_TCP;
- break;
- case FL_SCTP:
- proto = IPPROTO_SCTP;
- break;
- case FL_UDP:
- proto = IPPROTO_UDP;
- break;
- default:
- proto = 0;
- break;
- }
- return (proto);
-}
-
#ifdef INET
-#ifdef FLOWTABLE_DEBUG
-static void
-ipv4_flow_print_tuple(int flags, int proto, struct sockaddr_in *ssin,
- struct sockaddr_in *dsin)
+static struct flentry *
+flowtable_lookup_ipv4(struct mbuf *m, struct route *ro)
{
- char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
-
- if (flags & FL_HASH_ALL) {
- inet_ntoa_r(ssin->sin_addr, saddr);
- inet_ntoa_r(dsin->sin_addr, daddr);
- printf("proto=%d %s:%d->%s:%d\n",
- proto, saddr, ntohs(ssin->sin_port), daddr,
- ntohs(dsin->sin_port));
- } else {
- inet_ntoa_r(*(struct in_addr *) &dsin->sin_addr, daddr);
- printf("proto=%d %s\n", proto, daddr);
- }
-
-}
-#endif
-
-static int
-ipv4_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
- struct sockaddr_in *ssin, struct sockaddr_in *dsin, uint16_t *flags)
-{
+ struct flentry *fle;
+ struct sockaddr_in *sin;
struct ip *ip;
- uint8_t proto;
+ uint32_t fibnum;
+#ifdef FLOWTABLE_HASH_ALL
+ uint32_t key[3];
int iphlen;
- struct tcphdr *th;
- struct udphdr *uh;
- struct sctphdr *sh;
uint16_t sport, dport;
+ uint8_t proto;
+#endif
- proto = sport = dport = 0;
ip = mtod(m, struct ip *);
- dsin->sin_family = AF_INET;
- dsin->sin_len = sizeof(*dsin);
- dsin->sin_addr = ip->ip_dst;
- ssin->sin_family = AF_INET;
- ssin->sin_len = sizeof(*ssin);
- ssin->sin_addr = ip->ip_src;
+ if (ip->ip_src.s_addr == ip->ip_dst.s_addr ||
+ (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
+ (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
+ return (NULL);
+
+ fibnum = M_GETFIB(m);
+
+#ifdef FLOWTABLE_HASH_ALL
+ iphlen = ip->ip_hl << 2;
proto = ip->ip_p;
- if ((*flags & FL_HASH_ALL) == 0) {
- FLDPRINTF(ft, FL_DEBUG_ALL, "skip port check flags=0x%x ",
- *flags);
- goto skipports;
- }
- iphlen = ip->ip_hl << 2; /* XXX options? */
+ switch (proto) {
+ case IPPROTO_TCP: {
+ struct tcphdr *th;
- switch (proto) {
- case IPPROTO_TCP:
- th = (struct tcphdr *)((caddr_t)ip + iphlen);
+ th = (struct tcphdr *)((char *)ip + iphlen);
sport = th->th_sport;
dport = th->th_dport;
- if ((*flags & FL_HASH_ALL) &&
- (th->th_flags & (TH_RST|TH_FIN)))
- *flags |= FL_STALE;
- break;
- case IPPROTO_UDP:
- uh = (struct udphdr *)((caddr_t)ip + iphlen);
+ if (th->th_flags & (TH_RST|TH_FIN))
+ fibnum |= (FL_STALE << 24);
+ break;
+ }
+ case IPPROTO_UDP: {
+ struct udphdr *uh;
+
+ uh = (struct udphdr *)((char *)ip + iphlen);
sport = uh->uh_sport;
dport = uh->uh_dport;
- break;
- case IPPROTO_SCTP:
- sh = (struct sctphdr *)((caddr_t)ip + iphlen);
+ break;
+ }
+ case IPPROTO_SCTP: {
+ struct sctphdr *sh;
+
+ sh = (struct sctphdr *)((char *)ip + iphlen);
sport = sh->src_port;
dport = sh->dest_port;
- break;
+ /* XXXGL: handle stale? */
+ break;
+ }
default:
- FLDPRINTF(ft, FL_DEBUG_ALL, "proto=0x%x not supported\n", proto);
- return (ENOTSUP);
- /* no port - hence not a protocol we care about */
+ sport = dport = 0;
break;
-
}
-skipports:
- *flags |= proto_to_flags(proto);
- ssin->sin_port = sport;
- dsin->sin_port = dport;
- return (0);
-}
+ key[0] = ip->ip_dst.s_addr;
+ key[1] = ip->ip_src.s_addr;
+ key[2] = (dport << 16) | sport;
+ fibnum |= proto << 16;
-static uint32_t
-ipv4_flow_lookup_hash_internal(
- struct sockaddr_in *ssin, struct sockaddr_in *dsin,
- uint32_t *key, uint16_t flags)
-{
- uint16_t sport, dport;
- uint8_t proto;
- int offset = 0;
+ fle = flowtable_lookup_common(&V_ip4_ft, key, 3 * sizeof(uint32_t),
+ fibnum);
- if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
- return (0);
- proto = flags_to_proto(flags);
- sport = dport = key[2] = key[1] = key[0] = 0;
- if ((ssin != NULL) && (flags & FL_HASH_ALL)) {
- key[1] = ssin->sin_addr.s_addr;
- sport = ssin->sin_port;
- }
- if (dsin != NULL) {
- key[2] = dsin->sin_addr.s_addr;
- dport = dsin->sin_port;
- }
- if (flags & FL_HASH_ALL) {
- ((uint16_t *)key)[0] = sport;
- ((uint16_t *)key)[1] = dport;
- } else
- offset = V_flow_hashjitter + proto;
+#else /* !FLOWTABLE_HASH_ALL */
- return (jenkins_hashword(key, 3, offset));
-}
+ fle = flowtable_lookup_common(&V_ip4_ft, (uint32_t *)&ip->ip_dst,
+ sizeof(struct in_addr), fibnum);
-static struct flentry *
-flowtable_lookup_mbuf4(struct flowtable *ft, struct mbuf *m)
-{
- struct sockaddr_storage ssa, dsa;
- uint16_t flags;
- struct sockaddr_in *dsin, *ssin;
+#endif /* FLOWTABLE_HASH_ALL */
- dsin = (struct sockaddr_in *)&dsa;
- ssin = (struct sockaddr_in *)&ssa;
- bzero(dsin, sizeof(*dsin));
- bzero(ssin, sizeof(*ssin));
- flags = ft->ft_flags;
- if (ipv4_mbuf_demarshal(ft, m, ssin, dsin, &flags) != 0)
+ if (fle == NULL)
return (NULL);
- return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
-}
-
-void
-flow_to_route(struct flentry *fle, struct route *ro)
-{
- uint32_t *hashkey = NULL;
- struct sockaddr_in *sin;
-
sin = (struct sockaddr_in *)&ro->ro_dst;
sin->sin_family = AF_INET;
sin->sin_len = sizeof(*sin);
- hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
- sin->sin_addr.s_addr = hashkey[2];
- ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
- ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
- ro->ro_flags |= RT_NORTREF;
+ sin->sin_addr = ip->ip_dst;
+
+ return (fle);
}
#endif /* INET */
@@ -634,9 +280,8 @@
#define PULLUP_TO(_len, p, T) \
do { \
int x = (_len) + sizeof(T); \
- if ((m)->m_len < x) { \
- goto receive_failed; \
- } \
+ if ((m)->m_len < x) \
+ return (NULL); \
p = (mtod(m, char *) + (_len)); \
} while (0)
@@ -644,26 +289,35 @@
#define SCTP(p) ((struct sctphdr *)(p))
#define UDP(p) ((struct udphdr *)(p))
-static int
-ipv6_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
- struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6, uint16_t *flags)
+static struct flentry *
+flowtable_lookup_ipv6(struct mbuf *m, struct route *ro)
{
+ struct flentry *fle;
+ struct sockaddr_in6 *sin6;
struct ip6_hdr *ip6;
- uint8_t proto;
+ uint32_t fibnum;
+#ifdef FLOWTABLE_HASH_ALL
+ uint32_t key[9];
+ void *ulp;
int hlen;
- uint16_t src_port, dst_port;
+ uint16_t sport, dport;
u_short offset;
- void *ulp;
+ uint8_t proto;
+#else
+ uint32_t key[4];
+#endif
- offset = hlen = src_port = dst_port = 0;
- ulp = NULL;
ip6 = mtod(m, struct ip6_hdr *);
+ if (in6_localaddr(&ip6->ip6_dst))
+ return (NULL);
+
+ fibnum = M_GETFIB(m);
+
+#ifdef FLOWTABLE_HASH_ALL
hlen = sizeof(struct ip6_hdr);
proto = ip6->ip6_nxt;
-
- if ((*flags & FL_HASH_ALL) == 0)
- goto skipports;
-
+ offset = sport = dport = 0;
+ ulp = NULL;
while (ulp == NULL) {
switch (proto) {
case IPPROTO_ICMPV6:
@@ -676,21 +330,21 @@
break;
case IPPROTO_TCP:
PULLUP_TO(hlen, ulp, struct tcphdr);
- dst_port = TCP(ulp)->th_dport;
- src_port = TCP(ulp)->th_sport;
- if ((*flags & FL_HASH_ALL) &&
- (TCP(ulp)->th_flags & (TH_RST|TH_FIN)))
- *flags |= FL_STALE;
+ dport = TCP(ulp)->th_dport;
+ sport = TCP(ulp)->th_sport;
+ if (TCP(ulp)->th_flags & (TH_RST|TH_FIN))
+ fibnum |= (FL_STALE << 24);
break;
case IPPROTO_SCTP:
PULLUP_TO(hlen, ulp, struct sctphdr);
- src_port = SCTP(ulp)->src_port;
- dst_port = SCTP(ulp)->dest_port;
+ dport = SCTP(ulp)->src_port;
+ sport = SCTP(ulp)->dest_port;
+ /* XXXGL: handle stale? */
break;
case IPPROTO_UDP:
PULLUP_TO(hlen, ulp, struct udphdr);
- dst_port = UDP(ulp)->uh_dport;
- src_port = UDP(ulp)->uh_sport;
+ dport = UDP(ulp)->uh_dport;
+ sport = UDP(ulp)->uh_sport;
break;
case IPPROTO_HOPOPTS: /* RFC 2460 */
PULLUP_TO(hlen, ulp, struct ip6_hbh);
@@ -699,7 +353,7 @@
ulp = NULL;
break;
case IPPROTO_ROUTING: /* RFC 2460 */
- PULLUP_TO(hlen, ulp, struct ip6_rthdr);
+ PULLUP_TO(hlen, ulp, struct ip6_rthdr);
hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
ulp = NULL;
@@ -730,105 +384,28 @@
}
}
- if (src_port == 0) {
- receive_failed:
- return (ENOTSUP);
- }
+ bcopy(&ip6->ip6_dst, &key[0], sizeof(struct in6_addr));
+ bcopy(&ip6->ip6_src, &key[4], sizeof(struct in6_addr));
+ key[8] = (dport << 16) | sport;
+ fibnum |= proto << 16;
-skipports:
- dsin6->sin6_family = AF_INET6;
- dsin6->sin6_len = sizeof(*dsin6);
- dsin6->sin6_port = dst_port;
- memcpy(&dsin6->sin6_addr, &ip6->ip6_dst, sizeof(struct in6_addr));
+ fle = flowtable_lookup_common(&V_ip6_ft, key, 9 * sizeof(uint32_t),
+ fibnum);
+#else /* !FLOWTABLE_HASH_ALL */
+ bcopy(&ip6->ip6_dst, &key[0], sizeof(struct in6_addr));
+ fle = flowtable_lookup_common(&V_ip6_ft, key, sizeof(struct in6_addr),
+ fibnum);
+#endif /* FLOWTABLE_HASH_ALL */
- ssin6->sin6_family = AF_INET6;
- ssin6->sin6_len = sizeof(*ssin6);
- ssin6->sin6_port = src_port;
- memcpy(&ssin6->sin6_addr, &ip6->ip6_src, sizeof(struct in6_addr));
- *flags |= proto_to_flags(proto);
-
- return (0);
-}
-
-#define zero_key(key) \
-do { \
- key[0] = 0; \
- key[1] = 0; \
- key[2] = 0; \
- key[3] = 0; \
- key[4] = 0; \
- key[5] = 0; \
- key[6] = 0; \
- key[7] = 0; \
- key[8] = 0; \
-} while (0)
-
-static uint32_t
-ipv6_flow_lookup_hash_internal(
- struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6,
- uint32_t *key, uint16_t flags)
-{
- uint16_t sport, dport;
- uint8_t proto;
- int offset = 0;
-
- if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
- return (0);
-
- proto = flags_to_proto(flags);
- zero_key(key);
- sport = dport = 0;
- if (dsin6 != NULL) {
- memcpy(&key[1], &dsin6->sin6_addr, sizeof(struct in6_addr));
- dport = dsin6->sin6_port;
- }
- if ((ssin6 != NULL) && (flags & FL_HASH_ALL)) {
- memcpy(&key[5], &ssin6->sin6_addr, sizeof(struct in6_addr));
- sport = ssin6->sin6_port;
- }
- if (flags & FL_HASH_ALL) {
- ((uint16_t *)key)[0] = sport;
- ((uint16_t *)key)[1] = dport;
- } else
- offset = V_flow_hashjitter + proto;
-
- return (jenkins_hashword(key, 9, offset));
-}
-
-static struct flentry *
-flowtable_lookup_mbuf6(struct flowtable *ft, struct mbuf *m)
-{
- struct sockaddr_storage ssa, dsa;
- struct sockaddr_in6 *dsin6, *ssin6;
- uint16_t flags;
-
- dsin6 = (struct sockaddr_in6 *)&dsa;
- ssin6 = (struct sockaddr_in6 *)&ssa;
- bzero(dsin6, sizeof(*dsin6));
- bzero(ssin6, sizeof(*ssin6));
- flags = ft->ft_flags;
-
- if (ipv6_mbuf_demarshal(ft, m, ssin6, dsin6, &flags) != 0)
+ if (fle == NULL)
return (NULL);
- return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
-}
-
-void
-flow_to_route_in6(struct flentry *fle, struct route_in6 *ro)
-{
- uint32_t *hashkey = NULL;
- struct sockaddr_in6 *sin6;
-
sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
-
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(*sin6);
- hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
- memcpy(&sin6->sin6_addr, &hashkey[5], sizeof (struct in6_addr));
- ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
- ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
- ro->ro_flags |= RT_NORTREF;
+ bcopy(&ip6->ip6_dst, &sin6->sin6_addr, sizeof(struct in6_addr));
+
+ return (fle);
}
#endif /* INET6 */
@@ -835,583 +412,368 @@
static bitstr_t *
flowtable_mask(struct flowtable *ft)
{
- bitstr_t *mask;
- if (ft->ft_flags & FL_PCPU)
- mask = ft->ft_masks[curcpu];
- else
- mask = ft->ft_masks[0];
+ /*
+ * flowtable_free_stale() calls w/o critical section, but
+ * with sched_bind(). Since pointer is stable throughout
+ * ft lifetime, it is safe, otherwise...
+ *
+ * CRITICAL_ASSERT(curthread);
+ */
- return (mask);
+ return (*(bitstr_t **)zpcpu_get(ft->ft_masks));
}
-static struct flentry **
-flowtable_entry(struct flowtable *ft, uint32_t hash)
+static struct flist *
+flowtable_list(struct flowtable *ft, uint32_t hash)
{
- struct flentry **fle;
- int index = (hash % ft->ft_size);
- if (ft->ft_flags & FL_PCPU) {
- KASSERT(&ft->ft_table.pcpu[curcpu][0] != NULL, ("pcpu not set"));
- fle = &ft->ft_table.pcpu[curcpu][index];
- } else {
- KASSERT(&ft->ft_table.global[0] != NULL, ("global not set"));
- fle = &ft->ft_table.global[index];
- }
-
- return (fle);
+ CRITICAL_ASSERT(curthread);
+ return (zpcpu_get(ft->ft_table[hash % ft->ft_size]));
}
static int
-flow_stale(struct flowtable *ft, struct flentry *fle)
+flow_stale(struct flowtable *ft, struct flentry *fle, int maxidle)
{
- time_t idle_time;
- if ((fle->f_fhash == 0)
- || ((fle->f_rt->rt_flags & RTF_HOST) &&
- ((fle->f_rt->rt_flags & (RTF_UP))
- != (RTF_UP)))
- || (fle->f_rt->rt_ifp == NULL)
- || !RT_LINK_IS_UP(fle->f_rt->rt_ifp))
+ if (((fle->f_rt->rt_flags & RTF_HOST) &&
+ ((fle->f_rt->rt_flags & (RTF_UP)) != (RTF_UP))) ||
+ (fle->f_rt->rt_ifp == NULL) ||
+ !RT_LINK_IS_UP(fle->f_rt->rt_ifp) ||
+ (fle->f_lle->la_flags & LLE_VALID) == 0)
return (1);
- idle_time = time_uptime - fle->f_uptime;
+ if (time_uptime - fle->f_uptime > maxidle)
+ return (1);
- if ((fle->f_flags & FL_STALE) ||
- ((fle->f_flags & (TH_SYN|TH_ACK|TH_FIN)) == 0
- && (idle_time > ft->ft_udp_idle)) ||
- ((fle->f_flags & TH_FIN)
- && (idle_time > ft->ft_fin_wait_idle)) ||
- ((fle->f_flags & (TH_SYN|TH_ACK)) == TH_SYN
- && (idle_time > ft->ft_syn_idle)) ||
- ((fle->f_flags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)
- && (idle_time > ft->ft_tcp_idle)) ||
- ((fle->f_rt->rt_flags & RTF_UP) == 0 ||
- (fle->f_rt->rt_ifp == NULL)))
+#ifdef FLOWTABLE_HASH_ALL
+ if (fle->f_flags & FL_STALE)
return (1);
+#endif
return (0);
}
-static void
-flowtable_set_hashkey(struct flentry *fle, uint32_t *key)
+static int
+flow_full(void)
{
- uint32_t *hashkey;
- int i, nwords;
+ int count, max;
- if (fle->f_flags & FL_IPV6) {
- nwords = 9;
- hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
- } else {
- nwords = 3;
- hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
- }
-
- for (i = 0; i < nwords; i++)
- hashkey[i] = key[i];
-}
+ count = uma_zone_get_cur(flow_zone);
+ max = uma_zone_get_max(flow_zone);
-static struct flentry *
-flow_alloc(struct flowtable *ft)
-{
- struct flentry *newfle;
- uma_zone_t zone;
-
- newfle = NULL;
- zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
-
- newfle = uma_zalloc(zone, M_NOWAIT | M_ZERO);
- if (newfle != NULL)
- atomic_add_int(&ft->ft_count, 1);
- return (newfle);
+ return (count > (max - (max >> 3)));
}
-static void
-flow_free(struct flentry *fle, struct flowtable *ft)
+static int
+flow_matches(struct flentry *fle, uint32_t *key, int keylen, uint32_t fibnum)
{
- uma_zone_t zone;
+#ifdef FLOWTABLE_HASH_ALL
+ uint8_t proto;
- zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
- atomic_add_int(&ft->ft_count, -1);
- uma_zfree(zone, fle);
-}
+ proto = (fibnum >> 16) & 0xff;
+ fibnum &= 0xffff;
+#endif
-static int
-flow_full(struct flowtable *ft)
-{
- boolean_t full;
- uint32_t count;
-
- full = ft->ft_full;
- count = ft->ft_count;
+ CRITICAL_ASSERT(curthread);
- if (full && (count < (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 3))))
- ft->ft_full = FALSE;
- else if (!full && (count > (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 5))))
- ft->ft_full = TRUE;
-
- if (full && !ft->ft_full) {
- flowclean_freq = 4*hz;
- if ((ft->ft_flags & FL_HASH_ALL) == 0)
- ft->ft_udp_idle = ft->ft_fin_wait_idle =
- ft->ft_syn_idle = ft->ft_tcp_idle = 5;
- cv_broadcast(&flowclean_c_cv);
- } else if (!full && ft->ft_full) {
- flowclean_freq = 20*hz;
- if ((ft->ft_flags & FL_HASH_ALL) == 0)
- ft->ft_udp_idle = ft->ft_fin_wait_idle =
- ft->ft_syn_idle = ft->ft_tcp_idle = 30;
- }
+ /* Microoptimization for IPv4: don't use bcmp(). */
+ if (((keylen == sizeof(uint32_t) && (fle->f_key[0] != key[0])) ||
+ (bcmp(fle->f_key, key, keylen) == 0)) &&
+ fibnum == fle->f_fibnum &&
+#ifdef FLOWTABLE_HASH_ALL
+ proto == fle->f_proto &&
+#endif
+ (fle->f_rt->rt_flags & RTF_UP) &&
+ fle->f_rt->rt_ifp != NULL &&
+ (fle->f_lle->la_flags & LLE_VALID))
+ return (1);
- return (ft->ft_full);
+ return (0);
}
-static int
+static struct flentry *
flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
- uint32_t fibnum, struct route *ro, uint16_t flags)
+ int keylen, uint32_t fibnum0)
{
- struct flentry *fle, *fletail, *newfle, **flep;
- struct flowtable_stats *fs = &ft->ft_stats[curcpu];
- int depth;
+#ifdef INET6
+ struct route_in6 sro6;
+#endif
+#ifdef INET
+ struct route sro;
+#endif
+ struct route *ro = NULL;
+ struct rtentry *rt;
+ struct lltable *lt = NULL;
+ struct llentry *lle;
+ struct sockaddr_storage *l3addr;
+ struct ifnet *ifp;
+ struct flist *flist;
+ struct flentry *fle, *iter;
bitstr_t *mask;
+ uint16_t fibnum = fibnum0;
+#ifdef FLOWTABLE_HASH_ALL
uint8_t proto;
- newfle = flow_alloc(ft);
- if (newfle == NULL)
- return (ENOMEM);
+ proto = (fibnum0 >> 16) & 0xff;
+ fibnum = fibnum0 & 0xffff;
+#endif
- newfle->f_flags |= (flags & FL_IPV6);
- proto = flags_to_proto(flags);
-
- FL_ENTRY_LOCK(ft, hash);
- mask = flowtable_mask(ft);
- flep = flowtable_entry(ft, hash);
- fletail = fle = *flep;
-
- if (fle == NULL) {
- bit_set(mask, FL_ENTRY_INDEX(ft, hash));
- *flep = fle = newfle;
- goto skip;
- }
-
- depth = 0;
- fs->ft_collisions++;
/*
- * find end of list and make sure that we were not
- * preempted by another thread handling this flow
+ * This bit of code ends up locking the
+ * same route 3 times (just like ip_output + ether_output)
+ * - at lookup
+ * - in rt_check when called by arpresolve
+ * - dropping the refcount for the rtentry
+ *
+ * This could be consolidated to one if we wrote a variant
+ * of arpresolve with an rt_check variant that expected to
+ * receive the route locked
*/
- while (fle != NULL) {
- if (fle->f_fhash == hash && !flow_stale(ft, fle)) {
- /*
- * there was either a hash collision
- * or we lost a race to insert
- */
- FL_ENTRY_UNLOCK(ft, hash);
- flow_free(newfle, ft);
-
- if (flags & FL_OVERWRITE)
- goto skip;
- return (EEXIST);
- }
- /*
- * re-visit this double condition XXX
- */
- if (fletail->f_next != NULL)
- fletail = fle->f_next;
+#ifdef INET
+ if (ft == &V_ip4_ft) {
+ struct sockaddr_in *sin;
- depth++;
- fle = fle->f_next;
- }
+ ro = &sro;
+ bzero(&sro.ro_dst, sizeof(sro.ro_dst));
- if (depth > fs->ft_max_depth)
- fs->ft_max_depth = depth;
- fletail->f_next = newfle;
- fle = newfle;
-skip:
- flowtable_set_hashkey(fle, key);
-
- fle->f_proto = proto;
- fle->f_rt = ro->ro_rt;
- fle->f_lle = ro->ro_lle;
- fle->f_fhash = hash;
- fle->f_fibnum = fibnum;
- fle->f_uptime = time_uptime;
- FL_ENTRY_UNLOCK(ft, hash);
- return (0);
-}
-
-int
-kern_flowtable_insert(struct flowtable *ft,
- struct sockaddr_storage *ssa, struct sockaddr_storage *dsa,
- struct route *ro, uint32_t fibnum, int flags)
-{
- uint32_t key[9], hash;
-
- flags = (ft->ft_flags | flags | FL_OVERWRITE);
- hash = 0;
-
-#ifdef INET
- if (ssa->ss_family == AF_INET)
- hash = ipv4_flow_lookup_hash_internal((struct sockaddr_in *)ssa,
- (struct sockaddr_in *)dsa, key, flags);
+ sin = (struct sockaddr_in *)&sro.ro_dst;
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_addr.s_addr = key[0];
+ }
#endif
#ifdef INET6
- if (ssa->ss_family == AF_INET6)
- hash = ipv6_flow_lookup_hash_internal((struct sockaddr_in6 *)ssa,
- (struct sockaddr_in6 *)dsa, key, flags);
-#endif
- if (ro->ro_rt == NULL || ro->ro_lle == NULL)
- return (EINVAL);
+ if (ft == &V_ip6_ft) {
+ struct sockaddr_in6 *sin6;
- FLDPRINTF(ft, FL_DEBUG,
- "kern_flowtable_insert: key=%x:%x:%x hash=%x fibnum=%d flags=%x\n",
- key[0], key[1], key[2], hash, fibnum, flags);
- return (flowtable_insert(ft, hash, key, fibnum, ro, flags));
-}
+ ro = (struct route *)&sro6;
+ sin6 = &sro6.ro_dst;
-static int
-flowtable_key_equal(struct flentry *fle, uint32_t *key)
-{
- uint32_t *hashkey;
- int i, nwords;
-
- if (fle->f_flags & FL_IPV6) {
- nwords = 9;
- hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
- } else {
- nwords = 3;
- hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
+ bzero(sin6, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
+ bcopy(key, &sin6->sin6_addr, sizeof(struct in6_addr));
}
+#endif
- for (i = 0; i < nwords; i++)
- if (hashkey[i] != key[i])
- return (0);
+ ro->ro_rt = NULL;
+#ifdef RADIX_MPATH
+ rtalloc_mpath_fib(ro, hash, fibnum);
+#else
+ rtalloc_ign_fib(ro, 0, fibnum);
+#endif
+ if (ro->ro_rt == NULL)
+ return (NULL);
- return (1);
-}
+ rt = ro->ro_rt;
+ ifp = rt->rt_ifp;
-struct flentry *
-flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af)
-{
- struct flentry *fle = NULL;
+ if (ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) {
+ RTFREE(rt);
+ return (NULL);
+ }
#ifdef INET
- if (af == AF_INET)
- fle = flowtable_lookup_mbuf4(ft, m);
+ if (ft == &V_ip4_ft)
+ lt = LLTABLE(ifp);
#endif
#ifdef INET6
- if (af == AF_INET6)
- fle = flowtable_lookup_mbuf6(ft, m);
-#endif
- if (fle != NULL && m != NULL && (m->m_flags & M_FLOWID) == 0) {
- m->m_flags |= M_FLOWID;
- m->m_pkthdr.flowid = fle->f_fhash;
- }
- return (fle);
-}
-
-struct flentry *
-flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
- struct sockaddr_storage *dsa, uint32_t fibnum, int flags)
-{
- uint32_t key[9], hash;
- struct flentry *fle;
- struct flowtable_stats *fs = &ft->ft_stats[curcpu];
- uint8_t proto = 0;
- int error = 0;
- struct rtentry *rt;
- struct llentry *lle;
- struct route sro, *ro;
- struct route_in6 sro6;
+ if (ft == &V_ip6_ft)
+ lt = LLTABLE6(ifp);
+#endif
- sro.ro_rt = sro6.ro_rt = NULL;
- sro.ro_lle = sro6.ro_lle = NULL;
- ro = NULL;
- hash = 0;
- flags |= ft->ft_flags;
- proto = flags_to_proto(flags);
-#ifdef INET
- if (ssa->ss_family == AF_INET) {
- struct sockaddr_in *ssin, *dsin;
+ if (rt->rt_flags & RTF_GATEWAY)
+ l3addr = (struct sockaddr_storage *)rt->rt_gateway;
+ else
+ l3addr = (struct sockaddr_storage *)&ro->ro_dst;
+ lle = llentry_alloc(ifp, lt, l3addr);
- ro = &sro;
- memcpy(&ro->ro_dst, dsa, sizeof(struct sockaddr_in));
- /*
- * The harvested source and destination addresses
- * may contain port information if the packet is
- * from a transport protocol (e.g. TCP/UDP). The
- * port field must be cleared before performing
- * a route lookup.
- */
- ((struct sockaddr_in *)&ro->ro_dst)->sin_port = 0;
- dsin = (struct sockaddr_in *)dsa;
- ssin = (struct sockaddr_in *)ssa;
- if ((dsin->sin_addr.s_addr == ssin->sin_addr.s_addr) ||
- (ntohl(dsin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
- (ntohl(ssin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
- return (NULL);
+ if (lle == NULL) {
+ RTFREE(rt);
+ return (NULL);
+ }
- hash = ipv4_flow_lookup_hash_internal(ssin, dsin, key, flags);
+ /* Don't insert the entry if the ARP hasn't yet finished resolving. */
+ if ((lle->la_flags & LLE_VALID) == 0) {
+ RTFREE(rt);
+ LLE_FREE(lle);
+ FLOWSTAT_INC(ft, ft_fail_lle_invalid);
+ return (NULL);
}
-#endif
-#ifdef INET6
- if (ssa->ss_family == AF_INET6) {
- struct sockaddr_in6 *ssin6, *dsin6;
- ro = (struct route *)&sro6;
- memcpy(&sro6.ro_dst, dsa,
- sizeof(struct sockaddr_in6));
- ((struct sockaddr_in6 *)&ro->ro_dst)->sin6_port = 0;
- dsin6 = (struct sockaddr_in6 *)dsa;
- ssin6 = (struct sockaddr_in6 *)ssa;
+ fle = uma_zalloc(flow_zone, M_NOWAIT | M_ZERO);
+ if (fle == NULL) {
+ RTFREE(rt);
+ LLE_FREE(lle);
+ return (NULL);
+ }
- flags |= FL_IPV6;
- hash = ipv6_flow_lookup_hash_internal(ssin6, dsin6, key, flags);
- }
+ fle->f_hash = hash;
+ bcopy(key, &fle->f_key, keylen);
+ fle->f_rt = rt;
+ fle->f_lle = lle;
+ fle->f_fibnum = fibnum;
+ fle->f_uptime = time_uptime;
+#ifdef FLOWTABLE_HASH_ALL
+ fle->f_proto = proto;
+ fle->f_flags = fibnum0 >> 24;
#endif
- /*
- * Ports are zero and this isn't a transmit cache
- * - thus not a protocol for which we need to keep
- * state
- * FL_HASH_ALL => key[0] != 0 for TCP || UDP || SCTP
- */
- if (hash == 0 || (key[0] == 0 && (ft->ft_flags & FL_HASH_ALL)))
- return (NULL);
- fs->ft_lookups++;
- FL_ENTRY_LOCK(ft, hash);
- if ((fle = FL_ENTRY(ft, hash)) == NULL) {
- FL_ENTRY_UNLOCK(ft, hash);
- goto uncached;
+ critical_enter();
+ mask = flowtable_mask(ft);
+ flist = flowtable_list(ft, hash);
+
+ if (SLIST_EMPTY(flist)) {
+ bit_set(mask, (hash % ft->ft_size));
+ SLIST_INSERT_HEAD(flist, fle, f_next);
+ goto skip;
}
-keycheck:
- rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
- lle = __DEVOLATILE(struct llentry *, fle->f_lle);
- if ((rt != NULL)
- && lle != NULL
- && fle->f_fhash == hash
- && flowtable_key_equal(fle, key)
- && (proto == fle->f_proto)
- && (fibnum == fle->f_fibnum)
- && (rt->rt_flags & RTF_UP)
- && (rt->rt_ifp != NULL)
- && (lle->la_flags & LLE_VALID)) {
- fs->ft_hits++;
- fle->f_uptime = time_uptime;
- fle->f_flags |= flags;
- FL_ENTRY_UNLOCK(ft, hash);
- return (fle);
- } else if (fle->f_next != NULL) {
- fle = fle->f_next;
- goto keycheck;
- }
- FL_ENTRY_UNLOCK(ft, hash);
-uncached:
- if (flags & FL_NOAUTO || flow_full(ft))
- return (NULL);
- fs->ft_misses++;
/*
- * This bit of code ends up locking the
- * same route 3 times (just like ip_output + ether_output)
- * - at lookup
- * - in rt_check when called by arpresolve
- * - dropping the refcount for the rtentry
- *
- * This could be consolidated to one if we wrote a variant
- * of arpresolve with an rt_check variant that expected to
- * receive the route locked
+ * find end of list and make sure that we were not
+ * preempted by another thread handling this flow
*/
-
-#ifdef INVARIANTS
- if ((ro->ro_dst.sa_family != AF_INET) &&
- (ro->ro_dst.sa_family != AF_INET6))
- panic("sa_family == %d\n", ro->ro_dst.sa_family);
+ SLIST_FOREACH(iter, flist, f_next) {
+ KASSERT(iter->f_hash % ft->ft_size == hash % ft->ft_size,
+ ("%s: wrong hash", __func__));
+ if (flow_matches(iter, key, keylen, fibnum)) {
+ /*
+ * We probably migrated to an other CPU after
+ * lookup in flowtable_lookup_common() failed.
+ * It appeared that this CPU already has flow
+ * entry.
+ */
+ iter->f_uptime = time_uptime;
+#ifdef FLOWTABLE_HASH_ALL
+ iter->f_flags |= fibnum >> 24;
#endif
+ critical_exit();
+ FLOWSTAT_INC(ft, ft_collisions);
+ uma_zfree(flow_zone, fle);
+ return (iter);
+ }
+ }
- ft->ft_rtalloc(ro, hash, fibnum);
- if (ro->ro_rt == NULL)
- error = ENETUNREACH;
- else {
- struct llentry *lle = NULL;
- struct sockaddr_storage *l3addr;
- struct rtentry *rt = ro->ro_rt;
- struct ifnet *ifp = rt->rt_ifp;
+ SLIST_INSERT_HEAD(flist, fle, f_next);
+skip:
+ critical_exit();
+ FLOWSTAT_INC(ft, ft_inserts);
- if (ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) {
- RTFREE(rt);
- ro->ro_rt = NULL;
- return (NULL);
- }
-#ifdef INET6
- if (ssa->ss_family == AF_INET6) {
- struct sockaddr_in6 *dsin6;
+ return (fle);
+}
- dsin6 = (struct sockaddr_in6 *)dsa;
- if (in6_localaddr(&dsin6->sin6_addr)) {
- RTFREE(rt);
- ro->ro_rt = NULL;
- return (NULL);
- }
+int
+flowtable_lookup(sa_family_t sa, struct mbuf *m, struct route *ro)
+{
+ struct flentry *fle;
- if (rt->rt_flags & RTF_GATEWAY)
- l3addr = (struct sockaddr_storage *)rt->rt_gateway;
-
- else
- l3addr = (struct sockaddr_storage *)&ro->ro_dst;
- lle = llentry_alloc(ifp, LLTABLE6(ifp), l3addr);
- }
-#endif
+ if (V_flowtable_enable == 0)
+ return (ENXIO);
+
+ switch (sa) {
#ifdef INET
- if (ssa->ss_family == AF_INET) {
- if (rt->rt_flags & RTF_GATEWAY)
- l3addr = (struct sockaddr_storage *)rt->rt_gateway;
- else
- l3addr = (struct sockaddr_storage *)&ro->ro_dst;
- lle = llentry_alloc(ifp, LLTABLE(ifp), l3addr);
- }
-
+ case AF_INET:
+ fle = flowtable_lookup_ipv4(m, ro);
+ break;
#endif
- ro->ro_lle = lle;
+#ifdef INET6
+ case AF_INET6:
+ fle = flowtable_lookup_ipv6(m, ro);
+ break;
+#endif
+ default:
+ panic("%s: sa %d", __func__, sa);
+ }
- if (lle == NULL) {
- RTFREE(rt);
- ro->ro_rt = NULL;
- return (NULL);
- }
- error = flowtable_insert(ft, hash, key, fibnum, ro, flags);
+ if (fle == NULL)
+ return (EHOSTUNREACH);
- if (error) {
- RTFREE(rt);
- LLE_FREE(lle);
- ro->ro_rt = NULL;
- ro->ro_lle = NULL;
- }
- }
+ if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE) {
+ M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
+ m->m_pkthdr.flowid = fle->f_hash;
+ }
- return ((error) ? NULL : fle);
+ ro->ro_rt = fle->f_rt;
+ ro->ro_lle = fle->f_lle;
+ ro->ro_flags |= RT_NORTREF;
+
+ return (0);
}
-/*
- * used by the bit_alloc macro
- */
-#define calloc(count, size) malloc((count)*(size), M_DEVBUF, M_WAITOK|M_ZERO)
-
-struct flowtable *
-flowtable_alloc(char *name, int nentry, int flags)
+static struct flentry *
+flowtable_lookup_common(struct flowtable *ft, uint32_t *key, int keylen,
+ uint32_t fibnum)
{
- struct flowtable *ft, *fttail;
- int i;
+ struct flist *flist;
+ struct flentry *fle;
+ uint32_t hash;
- if (V_flow_hashjitter == 0)
- V_flow_hashjitter = arc4random();
+ FLOWSTAT_INC(ft, ft_lookups);
- KASSERT(nentry > 0, ("nentry must be > 0, is %d\n", nentry));
+ hash = jenkins_hash32(key, keylen / sizeof(uint32_t), flow_hashjitter);
- ft = malloc(sizeof(struct flowtable),
- M_RTABLE, M_WAITOK | M_ZERO);
-
- ft->ft_name = name;
- ft->ft_flags = flags;
- ft->ft_size = nentry;
-#ifdef RADIX_MPATH
- ft->ft_rtalloc = rtalloc_mpath_fib;
-#else
- ft->ft_rtalloc = rtalloc_ign_wrapper;
+ critical_enter();
+ flist = flowtable_list(ft, hash);
+ SLIST_FOREACH(fle, flist, f_next) {
+ KASSERT(fle->f_hash % ft->ft_size == hash % ft->ft_size,
+ ("%s: wrong hash", __func__));
+ if (flow_matches(fle, key, keylen, fibnum)) {
+ fle->f_uptime = time_uptime;
+#ifdef FLOWTABLE_HASH_ALL
+ fle->f_flags |= fibnum >> 24;
#endif
- if (flags & FL_PCPU) {
- ft->ft_lock = flowtable_pcpu_lock;
- ft->ft_unlock = flowtable_pcpu_unlock;
-
- for (i = 0; i <= mp_maxid; i++) {
- ft->ft_table.pcpu[i] =
- malloc(nentry*sizeof(struct flentry *),
- M_RTABLE, M_WAITOK | M_ZERO);
- ft->ft_masks[i] = bit_alloc(nentry);
+ critical_exit();
+ FLOWSTAT_INC(ft, ft_hits);
+ return (fle);
}
- } else {
- ft->ft_lock_count = 2*(powerof2(mp_maxid + 1) ? (mp_maxid + 1):
- (fls(mp_maxid + 1) << 1));
-
- ft->ft_lock = flowtable_global_lock;
- ft->ft_unlock = flowtable_global_unlock;
- ft->ft_table.global =
- malloc(nentry*sizeof(struct flentry *),
- M_RTABLE, M_WAITOK | M_ZERO);
- ft->ft_locks = malloc(ft->ft_lock_count*sizeof(struct mtx),
- M_RTABLE, M_WAITOK | M_ZERO);
- for (i = 0; i < ft->ft_lock_count; i++)
- mtx_init(&ft->ft_locks[i], "flow", NULL, MTX_DEF|MTX_DUPOK);
-
- ft->ft_masks[0] = bit_alloc(nentry);
}
- ft->ft_tmpmask = bit_alloc(nentry);
+ critical_exit();
- /*
- * In the local transmit case the table truly is
- * just a cache - so everything is eligible for
- * replacement after 5s of non-use
- */
- if (flags & FL_HASH_ALL) {
- ft->ft_udp_idle = V_flowtable_udp_expire;
- ft->ft_syn_idle = V_flowtable_syn_expire;
- ft->ft_fin_wait_idle = V_flowtable_fin_wait_expire;
- ft->ft_tcp_idle = V_flowtable_fin_wait_expire;
- } else {
- ft->ft_udp_idle = ft->ft_fin_wait_idle =
- ft->ft_syn_idle = ft->ft_tcp_idle = 30;
-
- }
+ FLOWSTAT_INC(ft, ft_misses);
- /*
- * hook in to the cleaner list
- */
- if (V_flow_list_head == NULL)
- V_flow_list_head = ft;
- else {
- fttail = V_flow_list_head;
- while (fttail->ft_next != NULL)
- fttail = fttail->ft_next;
- fttail->ft_next = ft;
- }
-
- return (ft);
+ return (flowtable_insert(ft, hash, key, keylen, fibnum));
}
/*
- * The rest of the code is devoted to garbage collection of expired entries.
- * It is a new additon made necessary by the switch to dynamically allocating
- * flow tables.
- *
+ * used by the bit_alloc macro
*/
+#define calloc(count, size) malloc((count)*(size), M_FTABLE, M_WAITOK | M_ZERO)
static void
-fle_free(struct flentry *fle, struct flowtable *ft)
+flowtable_alloc(struct flowtable *ft)
{
- struct rtentry *rt;
- struct llentry *lle;
- rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
- lle = __DEVOLATILE(struct llentry *, fle->f_lle);
- if (rt != NULL)
- RTFREE(rt);
- if (lle != NULL)
- LLE_FREE(lle);
- flow_free(fle, ft);
+ ft->ft_table = malloc(ft->ft_size * sizeof(struct flist),
+ M_FTABLE, M_WAITOK);
+ for (int i = 0; i < ft->ft_size; i++)
+ ft->ft_table[i] = uma_zalloc(pcpu_zone_ptr, M_WAITOK | M_ZERO);
+
+ ft->ft_masks = uma_zalloc(pcpu_zone_ptr, M_WAITOK);
+ for (int i = 0; i < mp_ncpus; i++) {
+ bitstr_t **b;
+
+ b = zpcpu_get_cpu(ft->ft_masks, i);
+ *b = bit_alloc(ft->ft_size);
+ }
+ ft->ft_tmpmask = bit_alloc(ft->ft_size);
}
+#undef calloc
static void
-flowtable_free_stale(struct flowtable *ft, struct rtentry *rt)
+flowtable_free_stale(struct flowtable *ft, struct rtentry *rt, int maxidle)
{
- int curbit = 0, count;
- struct flentry *fle, **flehead, *fleprev;
- struct flentry *flefreehead, *flefreetail, *fletmp;
+ struct flist *flist, freelist;
+ struct flentry *fle, *fle1, *fleprev;
bitstr_t *mask, *tmpmask;
- struct flowtable_stats *fs = &ft->ft_stats[curcpu];
+ int curbit, tmpsize;
- flefreehead = flefreetail = NULL;
+ SLIST_INIT(&freelist);
mask = flowtable_mask(ft);
tmpmask = ft->ft_tmpmask;
+ tmpsize = ft->ft_size;
memcpy(tmpmask, mask, ft->ft_size/8);
+ curbit = 0;
+ fleprev = NULL; /* pacify gcc */
/*
* XXX Note to self, bit_ffs operates at the byte level
* and thus adds gratuitous overhead
@@ -1425,129 +787,96 @@
break;
}
- FL_ENTRY_LOCK(ft, curbit);
- flehead = flowtable_entry(ft, curbit);
- fle = fleprev = *flehead;
+ FLOWSTAT_INC(ft, ft_free_checks);
- fs->ft_free_checks++;
+ critical_enter();
+ flist = flowtable_list(ft, curbit);
#ifdef DIAGNOSTIC
- if (fle == NULL && curbit > 0) {
+ if (SLIST_EMPTY(flist) && curbit > 0) {
log(LOG_ALERT,
"warning bit=%d set, but no fle found\n",
curbit);
}
-#endif
- while (fle != NULL) {
- if (rt != NULL) {
- if (__DEVOLATILE(struct rtentry *, fle->f_rt) != rt) {
- fleprev = fle;
- fle = fle->f_next;
- continue;
- }
- } else if (!flow_stale(ft, fle)) {
+#endif
+ SLIST_FOREACH_SAFE(fle, flist, f_next, fle1) {
+ if (rt != NULL && fle->f_rt != rt) {
fleprev = fle;
- fle = fle->f_next;
continue;
}
- /*
- * delete head of the list
- */
- if (fleprev == *flehead) {
- fletmp = fleprev;
- if (fle == fleprev) {
- fleprev = *flehead = fle->f_next;
- } else
- fleprev = *flehead = fle;
- fle = fle->f_next;
- } else {
- /*
- * don't advance fleprev
- */
- fletmp = fle;
- fleprev->f_next = fle->f_next;
- fle = fleprev->f_next;
+ if (!flow_stale(ft, fle, maxidle)) {
+ fleprev = fle;
+ continue;
}
- if (flefreehead == NULL)
- flefreehead = flefreetail = fletmp;
- else {
- flefreetail->f_next = fletmp;
- flefreetail = fletmp;
- }
- fletmp->f_next = NULL;
+ if (fle == SLIST_FIRST(flist))
+ SLIST_REMOVE_HEAD(flist, f_next);
+ else
+ SLIST_REMOVE_AFTER(fleprev, f_next);
+ SLIST_INSERT_HEAD(&freelist, fle, f_next);
}
- if (*flehead == NULL)
+ if (SLIST_EMPTY(flist))
bit_clear(mask, curbit);
- FL_ENTRY_UNLOCK(ft, curbit);
+ critical_exit();
+
bit_clear(tmpmask, curbit);
- bit_ffs(tmpmask, ft->ft_size, &curbit);
+ tmpmask += (curbit / 8);
+ tmpsize -= (curbit / 8) * 8;
+ bit_ffs(tmpmask, tmpsize, &curbit);
}
- count = 0;
- while ((fle = flefreehead) != NULL) {
- flefreehead = fle->f_next;
- count++;
- fs->ft_frees++;
- fle_free(fle, ft);
+
+ SLIST_FOREACH_SAFE(fle, &freelist, f_next, fle1) {
+ FLOWSTAT_INC(ft, ft_frees);
+ if (fle->f_rt != NULL)
+ RTFREE(fle->f_rt);
+ if (fle->f_lle != NULL)
+ LLE_FREE(fle->f_lle);
+ uma_zfree(flow_zone, fle);
}
- if (V_flowtable_debug && count)
- log(LOG_DEBUG, "freed %d flow entries\n", count);
}
-void
-flowtable_route_flush(struct flowtable *ft, struct rtentry *rt)
+static void
+flowtable_clean_vnet(struct flowtable *ft, struct rtentry *rt, int maxidle)
{
int i;
- if (ft->ft_flags & FL_PCPU) {
- CPU_FOREACH(i) {
- if (smp_started == 1) {
- thread_lock(curthread);
- sched_bind(curthread, i);
- thread_unlock(curthread);
- }
+ CPU_FOREACH(i) {
+ if (smp_started == 1) {
+ thread_lock(curthread);
+ sched_bind(curthread, i);
+ thread_unlock(curthread);
+ }
- flowtable_free_stale(ft, rt);
+ flowtable_free_stale(ft, rt, maxidle);
- if (smp_started == 1) {
- thread_lock(curthread);
- sched_unbind(curthread);
- thread_unlock(curthread);
- }
+ if (smp_started == 1) {
+ thread_lock(curthread);
+ sched_unbind(curthread);
+ thread_unlock(curthread);
}
- } else {
- flowtable_free_stale(ft, rt);
}
}
-static void
-flowtable_clean_vnet(void)
+void
+flowtable_route_flush(sa_family_t sa, struct rtentry *rt)
{
struct flowtable *ft;
- int i;
- ft = V_flow_list_head;
- while (ft != NULL) {
- if (ft->ft_flags & FL_PCPU) {
- CPU_FOREACH(i) {
- if (smp_started == 1) {
- thread_lock(curthread);
- sched_bind(curthread, i);
- thread_unlock(curthread);
- }
+ switch (sa) {
+#ifdef INET
+ case AF_INET:
+ ft = &V_ip4_ft;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ ft = &V_ip6_ft;
+ break;
+#endif
+ default:
+ panic("%s: sa %d", __func__, sa);
+ }
- flowtable_free_stale(ft, NULL);
-
- if (smp_started == 1) {
- thread_lock(curthread);
- sched_unbind(curthread);
- thread_unlock(curthread);
- }
- }
- } else {
- flowtable_free_stale(ft, NULL);
- }
- ft = ft->ft_next;
- }
+ flowtable_clean_vnet(ft, rt, 0);
}
static void
@@ -1560,18 +889,33 @@
log(LOG_INFO, "flowtable cleaner started\n");
td = curthread;
while (1) {
+ uint32_t flowclean_freq, maxidle;
+
+ /*
+ * The maximum idle time, as well as frequency are arbitrary.
+ */
+ if (flow_full())
+ maxidle = 5;
+ else
+ maxidle = 30;
+
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- flowtable_clean_vnet();
+#ifdef INET
+ flowtable_clean_vnet(&V_ip4_ft, NULL, maxidle);
+#endif
+#ifdef INET6
+ flowtable_clean_vnet(&V_ip6_ft, NULL, maxidle);
+#endif
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
- /*
- * The 10 second interval between cleaning checks
- * is arbitrary
- */
+ if (flow_full())
+ flowclean_freq = 4*hz;
+ else
+ flowclean_freq = 20*hz;
mtx_lock(&flowclean_lock);
thread_lock(td);
sched_prio(td, PPAUSE);
@@ -1604,91 +948,106 @@
};
SYSINIT(flowcleaner, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &flow_kp);
-static void
-flowtable_init_vnet(const void *unused __unused)
+static int
+flowtable_get_size(char *name)
{
+ int size;
- V_flowtable_nmbflows = 1024 + maxusers * 64 * mp_ncpus;
- V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
- NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
- V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
- NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
- uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows);
- uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
- V_flowtable_ready = 1;
+ if (TUNABLE_INT_FETCH(name, &size)) {
+ if (size < 256)
+ size = 256;
+ if (!powerof2(size)) {
+ printf("%s must be power of 2\n", name);
+ size = 2048;
+ }
+ } else {
+ /*
+ * round up to the next power of 2
+ */
+ size = 1 << fls((1024 + maxusers * 64) - 1);
+ }
+
+ return (size);
}
-VNET_SYSINIT(flowtable_init_vnet, SI_SUB_SMP, SI_ORDER_ANY,
- flowtable_init_vnet, NULL);
static void
flowtable_init(const void *unused __unused)
{
+ flow_hashjitter = arc4random();
+
+ flow_zone = uma_zcreate("flows", sizeof(struct flentry),
+ NULL, NULL, NULL, NULL, (64-1), UMA_ZONE_MAXBUCKET);
+ uma_zone_set_max(flow_zone, 1024 + maxusers * 64 * mp_ncpus);
+
cv_init(&flowclean_c_cv, "c_flowcleanwait");
cv_init(&flowclean_f_cv, "f_flowcleanwait");
mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF);
EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL,
EVENTHANDLER_PRI_ANY);
- flowclean_freq = 20*hz;
}
-SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST,
+SYSINIT(flowtable_init, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST,
flowtable_init, NULL);
+#ifdef INET
+static SYSCTL_NODE(_net_flowtable, OID_AUTO, ip4, CTLFLAG_RD, NULL,
+ "Flowtable for IPv4");
-#ifdef VIMAGE
+static VNET_PCPUSTAT_DEFINE(struct flowtable_stat, ip4_ftstat);
+VNET_PCPUSTAT_SYSINIT(ip4_ftstat);
+VNET_PCPUSTAT_SYSUNINIT(ip4_ftstat);
+SYSCTL_VNET_PCPUSTAT(_net_flowtable_ip4, OID_AUTO, stat, struct flowtable_stat,
+ ip4_ftstat, "Flowtable statistics for IPv4 "
+ "(struct flowtable_stat, net/flowtable.h)");
+
static void
-flowtable_uninit(const void *unused __unused)
+flowtable_init_vnet_v4(const void *unused __unused)
{
- V_flowtable_ready = 0;
- uma_zdestroy(V_flow_ipv4_zone);
- uma_zdestroy(V_flow_ipv6_zone);
+ V_ip4_ft.ft_size = flowtable_get_size("net.flowtable.ip4.size");
+ V_ip4_ft.ft_stat = VNET(ip4_ftstat);
+ flowtable_alloc(&V_ip4_ft);
}
+VNET_SYSINIT(ft_vnet_v4, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ flowtable_init_vnet_v4, NULL);
+#endif /* INET */
-VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
- flowtable_uninit, NULL);
-#endif
+#ifdef INET6
+static SYSCTL_NODE(_net_flowtable, OID_AUTO, ip6, CTLFLAG_RD, NULL,
+ "Flowtable for IPv6");
-#ifdef DDB
-static uint32_t *
-flowtable_get_hashkey(struct flentry *fle)
+static VNET_PCPUSTAT_DEFINE(struct flowtable_stat, ip6_ftstat);
+VNET_PCPUSTAT_SYSINIT(ip6_ftstat);
+VNET_PCPUSTAT_SYSUNINIT(ip6_ftstat);
+SYSCTL_VNET_PCPUSTAT(_net_flowtable_ip6, OID_AUTO, stat, struct flowtable_stat,
+ ip6_ftstat, "Flowtable statistics for IPv6 "
+ "(struct flowtable_stat, net/flowtable.h)");
+
+static void
+flowtable_init_vnet_v6(const void *unused __unused)
{
- uint32_t *hashkey;
- if (fle->f_flags & FL_IPV6)
- hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
- else
- hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
-
- return (hashkey);
+ V_ip6_ft.ft_size = flowtable_get_size("net.flowtable.ip6.size");
+ V_ip6_ft.ft_stat = VNET(ip6_ftstat);
+ flowtable_alloc(&V_ip6_ft);
}
+VNET_SYSINIT(flowtable_init_vnet_v6, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ flowtable_init_vnet_v6, NULL);
+#endif /* INET6 */
+#ifdef DDB
static bitstr_t *
flowtable_mask_pcpu(struct flowtable *ft, int cpuid)
{
- bitstr_t *mask;
- if (ft->ft_flags & FL_PCPU)
- mask = ft->ft_masks[cpuid];
- else
- mask = ft->ft_masks[0];
-
- return (mask);
+ return (zpcpu_get_cpu(*ft->ft_masks, cpuid));
}
-static struct flentry **
-flowtable_entry_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
+static struct flist *
+flowtable_list_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
{
- struct flentry **fle;
- int index = (hash % ft->ft_size);
- if (ft->ft_flags & FL_PCPU) {
- fle = &ft->ft_table.pcpu[cpuid][index];
- } else {
- fle = &ft->ft_table.global[index];
- }
-
- return (fle);
+ return (zpcpu_get_cpu(&ft->ft_table[hash % ft->ft_size], cpuid));
}
static void
@@ -1696,40 +1055,58 @@
{
int idle_time;
int rt_valid, ifp_valid;
- uint16_t sport, dport;
- uint32_t *hashkey;
- char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
volatile struct rtentry *rt;
struct ifnet *ifp = NULL;
+ uint32_t *hashkey = fle->f_key;
idle_time = (int)(time_uptime - fle->f_uptime);
rt = fle->f_rt;
rt_valid = rt != NULL;
- if (rt_valid)
+ if (rt_valid)
ifp = rt->rt_ifp;
ifp_valid = ifp != NULL;
- hashkey = flowtable_get_hashkey(fle);
- if (fle->f_flags & FL_IPV6)
- goto skipaddr;
- inet_ntoa_r(*(struct in_addr *) &hashkey[2], daddr);
- if (ft->ft_flags & FL_HASH_ALL) {
- inet_ntoa_r(*(struct in_addr *) &hashkey[1], saddr);
- sport = ntohs(((uint16_t *)hashkey)[0]);
- dport = ntohs(((uint16_t *)hashkey)[1]);
- db_printf("%s:%d->%s:%d",
- saddr, sport, daddr,
- dport);
- } else
+#ifdef INET
+ if (ft == &V_ip4_ft) {
+ char daddr[4*sizeof "123"];
+#ifdef FLOWTABLE_HASH_ALL
+ char saddr[4*sizeof "123"];
+ uint16_t sport, dport;
+#endif
+
+ inet_ntoa_r(*(struct in_addr *) &hashkey[0], daddr);
+#ifdef FLOWTABLE_HASH_ALL
+ inet_ntoa_r(*(struct in_addr *) &hashkey[1], saddr);
+ dport = ntohs((uint16_t)(hashkey[2] >> 16));
+ sport = ntohs((uint16_t)(hashkey[2] & 0xffff));
+ db_printf("%s:%d->%s:%d", saddr, sport, daddr, dport);
+#else
db_printf("%s ", daddr);
-
-skipaddr:
+#endif
+ }
+#endif /* INET */
+#ifdef INET6
+ if (ft == &V_ip6_ft) {
+#ifdef FLOWTABLE_HASH_ALL
+ db_printf("\n\tkey=%08x:%08x:%08x%08x:%08x:%08x%08x:%08x:%08x",
+ hashkey[0], hashkey[1], hashkey[2],
+ hashkey[3], hashkey[4], hashkey[5],
+ hashkey[6], hashkey[7], hashkey[8]);
+#else
+ db_printf("\n\tkey=%08x:%08x:%08x ",
+ hashkey[0], hashkey[1], hashkey[2]);
+#endif
+ }
+#endif /* INET6 */
+
+ db_printf("hash=%08x idle_time=%03d"
+ "\n\tfibnum=%02d rt=%p",
+ fle->f_hash, idle_time, fle->f_fibnum, fle->f_rt);
+
+#ifdef FLOWTABLE_HASH_ALL
if (fle->f_flags & FL_STALE)
db_printf(" FL_STALE ");
- if (fle->f_flags & FL_TCP)
- db_printf(" FL_TCP ");
- if (fle->f_flags & FL_UDP)
- db_printf(" FL_UDP ");
+#endif
if (rt_valid) {
if (rt->rt_flags & RTF_UP)
db_printf(" RTF_UP ");
@@ -1738,21 +1115,10 @@
if (ifp->if_flags & IFF_LOOPBACK)
db_printf(" IFF_LOOPBACK ");
if (ifp->if_flags & IFF_UP)
- db_printf(" IFF_UP ");
+ db_printf(" IFF_UP ");
if (ifp->if_flags & IFF_POINTOPOINT)
- db_printf(" IFF_POINTOPOINT ");
+ db_printf(" IFF_POINTOPOINT ");
}
- if (fle->f_flags & FL_IPV6)
- db_printf("\n\tkey=%08x:%08x:%08x%08x:%08x:%08x%08x:%08x:%08x",
- hashkey[0], hashkey[1], hashkey[2],
- hashkey[3], hashkey[4], hashkey[5],
- hashkey[6], hashkey[7], hashkey[8]);
- else
- db_printf("\n\tkey=%08x:%08x:%08x ",
- hashkey[0], hashkey[1], hashkey[2]);
- db_printf("hash=%08x idle_time=%03d"
- "\n\tfibnum=%02d rt=%p",
- fle->f_fhash, idle_time, fle->f_fibnum, fle->f_rt);
db_printf("\n");
}
@@ -1760,7 +1126,6 @@
flowtable_show(struct flowtable *ft, int cpuid)
{
int curbit = 0;
- struct flentry *fle, **flehead;
bitstr_t *mask, *tmpmask;
if (cpuid != -1)
@@ -1774,6 +1139,9 @@
*/
bit_ffs(tmpmask, ft->ft_size, &curbit);
while (curbit != -1) {
+ struct flist *flist;
+ struct flentry *fle;
+
if (curbit >= ft->ft_size || curbit < -1) {
db_printf("warning: bad curbit value %d \n",
curbit);
@@ -1780,14 +1148,10 @@
break;
}
- flehead = flowtable_entry_pcpu(ft, curbit, cpuid);
- fle = *flehead;
+ flist = flowtable_list_pcpu(ft, curbit, cpuid);
- while (fle != NULL) {
+ SLIST_FOREACH(fle, flist, f_next)
flow_show(ft, fle);
- fle = fle->f_next;
- continue;
- }
bit_clear(tmpmask, curbit);
bit_ffs(tmpmask, ft->ft_size, &curbit);
}
@@ -1794,23 +1158,13 @@
}
static void
-flowtable_show_vnet(void)
+flowtable_show_vnet(struct flowtable *ft)
{
- struct flowtable *ft;
+
int i;
- ft = V_flow_list_head;
- while (ft != NULL) {
- printf("name: %s\n", ft->ft_name);
- if (ft->ft_flags & FL_PCPU) {
- CPU_FOREACH(i) {
- flowtable_show(ft, i);
- }
- } else {
- flowtable_show(ft, -1);
- }
- ft = ft->ft_next;
- }
+ CPU_FOREACH(i)
+ flowtable_show(ft, i);
}
DB_SHOW_COMMAND(flowtables, db_show_flowtables)
@@ -1822,7 +1176,14 @@
#ifdef VIMAGE
db_printf("vnet %p\n", vnet_iter);
#endif
- flowtable_show_vnet();
+#ifdef INET
+ printf("IPv4:\n");
+ flowtable_show_vnet(&V_ip4_ft);
+#endif
+#ifdef INET6
+ printf("IPv6:\n");
+ flowtable_show_vnet(&V_ip6_ft);
+#endif
CURVNET_RESTORE();
}
}
Modified: trunk/sys/net/flowtable.h
===================================================================
--- trunk/sys/net/flowtable.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/flowtable.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,84 +1,57 @@
/* $MidnightBSD$ */
-/**************************************************************************
+/*-
+ * Copyright (c) 2014 Gleb Smirnoff <glebius at FreeBSD.org>
+ * Copyright (c) 2008-2010, BitGravity Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Neither the name of the BitGravity Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/net/flowtable.h 262743 2014-03-04 15:14:47Z glebius $
+ *
+ */
-Copyright (c) 2008-2010, BitGravity Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the BitGravity Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-$FreeBSD: stable/9/sys/net/flowtable.h 208171 2010-05-16 21:48:39Z kmacy $
-
-***************************************************************************/
-
#ifndef _NET_FLOWTABLE_H_
#define _NET_FLOWTABLE_H_
+struct flowtable_stat {
+ uint64_t ft_collisions;
+ uint64_t ft_misses;
+ uint64_t ft_free_checks;
+ uint64_t ft_frees;
+ uint64_t ft_hits;
+ uint64_t ft_lookups;
+ uint64_t ft_fail_lle_invalid;
+ uint64_t ft_inserts;
+};
+
#ifdef _KERNEL
-#define FL_HASH_ALL (1<<0) /* hash 4-tuple + protocol */
-#define FL_PCPU (1<<1) /* pcpu cache */
-#define FL_NOAUTO (1<<2) /* don't automatically add flentry on miss */
-#define FL_IPV6 (1<<9)
-
-#define FL_TCP (1<<11)
-#define FL_SCTP (1<<12)
-#define FL_UDP (1<<13)
-#define FL_DEBUG (1<<14)
-#define FL_DEBUG_ALL (1<<15)
-
-struct flowtable;
-struct flentry;
-struct route;
-struct route_in6;
-
-VNET_DECLARE(struct flowtable *, ip_ft);
-#define V_ip_ft VNET(ip_ft)
-
-VNET_DECLARE(struct flowtable *, ip6_ft);
-#define V_ip6_ft VNET(ip6_ft)
-
-struct flowtable *flowtable_alloc(char *name, int nentry, int flags);
-
/*
- * Given a flow table, look up the L3 and L2 information and
- * return it in the route.
- *
+ * Given a flow table, look up the L3 and L2 information
+ * and return it in the route.
*/
-struct flentry *flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af);
+int flowtable_lookup(sa_family_t, struct mbuf *, struct route *);
+void flowtable_route_flush(sa_family_t, struct rtentry *);
-struct flentry *flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
- struct sockaddr_storage *dsa, uint32_t fibnum, int flags);
-
-int kern_flowtable_insert(struct flowtable *ft, struct sockaddr_storage *ssa,
- struct sockaddr_storage *dsa, struct route *ro, uint32_t fibnum, int flags);
-
-void flow_invalidate(struct flentry *fl);
-void flowtable_route_flush(struct flowtable *ft, struct rtentry *rt);
-
-void flow_to_route(struct flentry *fl, struct route *ro);
-
-void flow_to_route_in6(struct flentry *fl, struct route_in6 *ro);
-
-
#endif /* _KERNEL */
-#endif
+#endif /* !_NET_FLOWTABLE_H_ */
Modified: trunk/sys/net/ieee8023ad_lacp.c
===================================================================
--- trunk/sys/net/ieee8023ad_lacp.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/ieee8023ad_lacp.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -29,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/ieee8023ad_lacp.c 237669 2012-06-27 22:06:42Z thompsa $");
+__FBSDID("$FreeBSD: stable/10/sys/net/ieee8023ad_lacp.c 313039 2017-02-01 04:54:23Z rpokala $");
#include <sys/param.h>
#include <sys/callout.h>
@@ -188,30 +188,37 @@
static void lacp_dprintf(const struct lacp_port *, const char *, ...)
__attribute__((__format__(__printf__, 2, 3)));
-static int lacp_debug = 0;
-SYSCTL_INT(_net, OID_AUTO, lacp_debug, CTLFLAG_RW | CTLFLAG_TUN,
- &lacp_debug, 0, "Enable LACP debug logging (1=debug, 2=trace)");
-TUNABLE_INT("net.lacp_debug", &lacp_debug);
+static VNET_DEFINE(int, lacp_debug);
+#define V_lacp_debug VNET(lacp_debug)
+SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD, 0, "ieee802.3ad");
+SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RWTUN | CTLFLAG_VNET,
+ &VNET_NAME(lacp_debug), 0, "Enable LACP debug logging (1=debug, 2=trace)");
-#define LACP_DPRINTF(a) if (lacp_debug > 0) { lacp_dprintf a ; }
-#define LACP_TRACE(a) if (lacp_debug > 1) { lacp_dprintf(a,"%s\n",__func__); }
+static VNET_DEFINE(int, lacp_default_strict_mode) = 1;
+SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, default_strict_mode, CTLFLAG_RWTUN,
+ &VNET_NAME(lacp_default_strict_mode), 0,
+ "LACP strict protocol compliance default");
+#define LACP_DPRINTF(a) if (V_lacp_debug & 0x01) { lacp_dprintf a ; }
+#define LACP_TRACE(a) if (V_lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); }
+#define LACP_TPRINTF(a) if (V_lacp_debug & 0x04) { lacp_dprintf a ; }
+
/*
* partner administration variables.
* XXX should be configurable.
*/
-static const struct lacp_peerinfo lacp_partner_admin = {
+static const struct lacp_peerinfo lacp_partner_admin_optimistic = {
.lip_systemid = { .lsi_prio = 0xffff },
.lip_portid = { .lpi_prio = 0xffff },
-#if 1
- /* optimistic */
.lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
-#else
- /* pessimistic */
+};
+
+static const struct lacp_peerinfo lacp_partner_admin_strict = {
+ .lip_systemid = { .lsi_prio = 0xffff },
+ .lip_portid = { .lpi_prio = 0xffff },
.lip_state = 0,
-#endif
};
static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
@@ -297,11 +304,16 @@
goto bad;
}
- if (lacp_debug > 0) {
+ if (V_lacp_debug > 0) {
lacp_dprintf(lp, "lacpdu receive\n");
lacp_dump_lacpdu(du);
}
+ if ((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_rx_test) {
+ LACP_TPRINTF((lp, "Dropping RX PDU\n"));
+ goto bad;
+ }
+
LACP_LOCK(lsc);
lacp_sm_rx(lp, du);
LACP_UNLOCK(lsc);
@@ -349,7 +361,7 @@
LACP_LOCK_ASSERT(lp->lp_lsc);
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
return (ENOMEM);
}
@@ -377,7 +389,7 @@
sizeof(du->ldu_collector));
du->ldu_collector.lci_maxdelay = 0;
- if (lacp_debug > 0) {
+ if (V_lacp_debug > 0) {
lacp_dprintf(lp, "lacpdu transmit\n");
lacp_dump_lacpdu(du);
}
@@ -403,7 +415,7 @@
LACP_LOCK_ASSERT(lp->lp_lsc);
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
return (ENOMEM);
}
@@ -489,6 +501,7 @@
if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
continue;
+ CURVNET_SET(lp->lp_ifp->if_vnet);
lacp_run_timers(lp);
lacp_select(lp);
@@ -495,6 +508,7 @@
lacp_sm_mux(lp);
lacp_sm_tx(lp);
lacp_sm_ptx_tx_schedule(lp);
+ CURVNET_RESTORE();
}
callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
}
@@ -510,9 +524,6 @@
struct ifmultiaddr *rifma = NULL;
int error;
- boolean_t active = TRUE; /* XXX should be configurable */
- boolean_t fast = FALSE; /* XXX should be configurable */
-
bzero((char *)&sdl, sizeof(sdl));
sdl.sdl_len = sizeof(sdl);
sdl.sdl_family = AF_LINK;
@@ -544,9 +555,7 @@
lacp_fill_actorinfo(lp, &lp->lp_actor);
lacp_fill_markerinfo(lp, &lp->lp_marker);
- lp->lp_state =
- (active ? LACP_STATE_ACTIVITY : 0) |
- (fast ? LACP_STATE_TIMEOUT : 0);
+ lp->lp_state = LACP_STATE_ACTIVITY;
lp->lp_aggregator = NULL;
lacp_sm_rx_set_expired(lp);
LACP_UNLOCK(lsc);
@@ -571,12 +580,13 @@
lacp_disable_distributing(lp);
lacp_unselect(lp);
+ LIST_REMOVE(lp, lp_next);
+ LACP_UNLOCK(lsc);
+
/* The address may have already been removed by if_purgemaddrs() */
if (!lgp->lp_detaching)
if_delmulti_ifma(lp->lp_ifma);
- LIST_REMOVE(lp, lp_next);
- LACP_UNLOCK(lsc);
free(lp, M_DEVBUF);
}
@@ -585,10 +595,20 @@
{
struct lacp_opreq *req = (struct lacp_opreq *)data;
struct lacp_softc *lsc = LACP_SOFTC(sc);
- struct lacp_aggregator *la = lsc->lsc_active_aggregator;
+ struct lacp_aggregator *la;
+ bzero(req, sizeof(struct lacp_opreq));
+
+ /*
+ * If the LACP softc is NULL, return with the opreq structure full of
+ * zeros. It is normal for the softc to be NULL while the lagg is
+ * being destroyed.
+ */
+ if (NULL == lsc)
+ return;
+
+ la = lsc->lsc_active_aggregator;
LACP_LOCK(lsc);
- bzero(req, sizeof(struct lacp_opreq));
if (la != NULL) {
req->actor_prio = ntohs(la->la_actor.lip_systemid.lsi_prio);
memcpy(&req->actor_mac, &la->la_actor.lip_systemid.lsi_mac,
@@ -654,6 +674,7 @@
{
struct lacp_aggregator *la = lp->lp_aggregator;
struct lacp_softc *lsc = lp->lp_lsc;
+ struct lagg_softc *sc = lsc->lsc_softc;
char buf[LACP_LAGIDSTR_MAX+1];
LACP_LOCK_ASSERT(lsc);
@@ -673,6 +694,7 @@
TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
la->la_nports--;
+ sc->sc_active = la->la_nports;
if (lsc->lsc_active_aggregator == la) {
lacp_suppress_distributing(lsc, la);
@@ -689,6 +711,7 @@
{
struct lacp_aggregator *la = lp->lp_aggregator;
struct lacp_softc *lsc = lp->lp_lsc;
+ struct lagg_softc *sc = lsc->lsc_softc;
char buf[LACP_LAGIDSTR_MAX+1];
LACP_LOCK_ASSERT(lsc);
@@ -705,6 +728,7 @@
KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
la->la_nports++;
+ sc->sc_active = la->la_nports;
lp->lp_state |= LACP_STATE_DISTRIBUTING;
@@ -723,20 +747,19 @@
LACP_LOCK_ASSERT(lsc);
+ CURVNET_SET(lsc->lsc_softc->sc_ifp->if_vnet);
LACP_TRACE(NULL);
+ CURVNET_RESTORE();
lsc->lsc_suppress_distributing = FALSE;
}
-int
+void
lacp_attach(struct lagg_softc *sc)
{
struct lacp_softc *lsc;
- lsc = malloc(sizeof(struct lacp_softc),
- M_DEVBUF, M_NOWAIT|M_ZERO);
- if (lsc == NULL)
- return (ENOMEM);
+ lsc = malloc(sizeof(struct lacp_softc), M_DEVBUF, M_WAITOK | M_ZERO);
sc->sc_psc = (caddr_t)lsc;
lsc->lsc_softc = sc;
@@ -743,6 +766,7 @@
lsc->lsc_hashkey = arc4random();
lsc->lsc_active_aggregator = NULL;
+ lsc->lsc_strict_mode = VNET(lacp_default_strict_mode);
LACP_LOCK_INIT(lsc);
TAILQ_INIT(&lsc->lsc_aggregators);
LIST_INIT(&lsc->lsc_ports);
@@ -753,14 +777,12 @@
/* if the lagg is already up then do the same */
if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
lacp_init(sc);
-
- return (0);
}
int
-lacp_detach(struct lagg_softc *sc)
+lacp_detach(void *psc)
{
- struct lacp_softc *lsc = LACP_SOFTC(sc);
+ struct lacp_softc *lsc = (struct lacp_softc *)psc;
KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
("aggregators still active"));
@@ -767,7 +789,6 @@
KASSERT(lsc->lsc_active_aggregator == NULL,
("aggregator still attached"));
- sc->sc_psc = NULL;
callout_drain(&lsc->lsc_transit_callout);
callout_drain(&lsc->lsc_callout);
@@ -816,8 +837,9 @@
return (NULL);
}
- if (sc->use_flowid && (m->m_flags & M_FLOWID))
- hash = m->m_pkthdr.flowid;
+ if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
+ M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ hash = m->m_pkthdr.flowid >> sc->flowid_shift;
else
hash = lagg_hashmbuf(sc, m, lsc->lsc_hashkey);
hash %= pm->pm_count;
@@ -909,7 +931,6 @@
static void
lacp_select_active_aggregator(struct lacp_softc *lsc)
{
- struct lagg_softc *sc = lsc->lsc_softc;
struct lacp_aggregator *la;
struct lacp_aggregator *best_la = NULL;
uint64_t best_speed = 0;
@@ -961,7 +982,6 @@
lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
if (lsc->lsc_active_aggregator != best_la) {
- sc->sc_ifp->if_baudrate = best_speed;
lsc->lsc_active_aggregator = best_la;
lacp_update_portmap(lsc);
if (best_la) {
@@ -977,9 +997,11 @@
static void
lacp_update_portmap(struct lacp_softc *lsc)
{
+ struct lagg_softc *sc = lsc->lsc_softc;
struct lacp_aggregator *la;
struct lacp_portmap *p;
struct lacp_port *lp;
+ uint64_t speed;
u_int newmap;
int i;
@@ -986,6 +1008,7 @@
newmap = lsc->lsc_activemap == 0 ? 1 : 0;
p = &lsc->lsc_pmap[newmap];
la = lsc->lsc_active_aggregator;
+ speed = 0;
bzero(p, sizeof(struct lacp_portmap));
if (la != NULL && la->la_nports > 0) {
@@ -994,7 +1017,9 @@
TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q)
p->pm_map[i++] = lp;
KASSERT(i == p->pm_count, ("Invalid port count"));
+ speed = lacp_aggregator_bandwidth(la);
}
+ sc->sc_ifp->if_baudrate = speed;
/* switch the active portmap over */
atomic_store_rel_int(&lsc->lsc_activemap, newmap);
@@ -1029,8 +1054,87 @@
KASSERT(IFM_TYPE(media) == IFM_ETHER, ("invalid media type"));
KASSERT((media & IFM_FDX) != 0, ("aggregating HDX interface"));
- /* bit 0..4: IFM_SUBTYPE */
- key = subtype;
+ /* bit 0..4: IFM_SUBTYPE modulo speed */
+ switch (subtype) {
+ case IFM_10_T:
+ case IFM_10_2:
+ case IFM_10_5:
+ case IFM_10_STP:
+ case IFM_10_FL:
+ key = IFM_10_T;
+ break;
+ case IFM_100_TX:
+ case IFM_100_FX:
+ case IFM_100_T4:
+ case IFM_100_VG:
+ case IFM_100_T2:
+ case IFM_100_T:
+ key = IFM_100_TX;
+ break;
+ case IFM_1000_SX:
+ case IFM_1000_LX:
+ case IFM_1000_CX:
+ case IFM_1000_T:
+ case IFM_1000_KX:
+ case IFM_1000_SGMII:
+ case IFM_1000_CX_SGMII:
+ key = IFM_1000_SX;
+ break;
+ case IFM_10G_LR:
+ case IFM_10G_SR:
+ case IFM_10G_CX4:
+ case IFM_10G_TWINAX:
+ case IFM_10G_TWINAX_LONG:
+ case IFM_10G_LRM:
+ case IFM_10G_T:
+ case IFM_10G_KX4:
+ case IFM_10G_KR:
+ case IFM_10G_CR1:
+ case IFM_10G_ER:
+ case IFM_10G_SFI:
+ key = IFM_10G_LR;
+ break;
+ case IFM_20G_KR2:
+ key = IFM_20G_KR2;
+ break;
+ case IFM_2500_KX:
+ case IFM_2500_T:
+ key = IFM_2500_KX;
+ break;
+ case IFM_5000_T:
+ key = IFM_5000_T;
+ break;
+ case IFM_50G_PCIE:
+ case IFM_50G_CR2:
+ case IFM_50G_KR2:
+ key = IFM_50G_PCIE;
+ break;
+ case IFM_56G_R4:
+ key = IFM_56G_R4;
+ break;
+ case IFM_25G_PCIE:
+ case IFM_25G_CR:
+ case IFM_25G_KR:
+ case IFM_25G_SR:
+ key = IFM_25G_PCIE;
+ break;
+ case IFM_40G_CR4:
+ case IFM_40G_SR4:
+ case IFM_40G_LR4:
+ case IFM_40G_XLPPI:
+ case IFM_40G_KR4:
+ key = IFM_40G_CR4;
+ break;
+ case IFM_100G_CR4:
+ case IFM_100G_SR4:
+ case IFM_100G_KR4:
+ case IFM_100G_LR4:
+ key = IFM_100G_CR4;
+ break;
+ default:
+ key = subtype;
+ break;
+ }
/* bit 5..14: (some bits of) if_index of lagg device */
key |= 0x7fe0 & ((sc->sc_ifp->if_index) << 5);
/* bit 15: 0 */
@@ -1265,6 +1369,8 @@
static void
lacp_sm_mux(struct lacp_port *lp)
{
+ struct lagg_port *lgp = lp->lp_lagg;
+ struct lagg_softc *sc = lgp->lp_softc;
enum lacp_mux_state new_state;
boolean_t p_sync =
(lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0;
@@ -1273,8 +1379,10 @@
enum lacp_selected selected = lp->lp_selected;
struct lacp_aggregator *la;
- if (lacp_debug > 1)
- lacp_dprintf(lp, "%s: state %d\n", __func__, lp->lp_mux_state);
+ if (V_lacp_debug > 1)
+ lacp_dprintf(lp, "%s: state= 0x%x, selected= 0x%x, "
+ "p_sync= 0x%x, p_collecting= 0x%x\n", __func__,
+ lp->lp_mux_state, selected, p_sync, p_collecting);
re_eval:
la = lp->lp_aggregator;
@@ -1314,6 +1422,8 @@
case LACP_MUX_DISTRIBUTING:
if (selected != LACP_SELECTED || !p_sync || !p_collecting) {
new_state = LACP_MUX_COLLECTING;
+ lacp_dprintf(lp, "Interface stopped DISTRIBUTING, possible flapping\n");
+ sc->sc_flapping++;
}
break;
default:
@@ -1562,6 +1672,10 @@
sizeof(buf))));
}
+ /* XXX Hack, still need to implement 5.4.9 para 2,3,4 */
+ if (lp->lp_lsc->lsc_strict_mode)
+ lp->lp_partner.lip_state |= LACP_STATE_SYNC;
+
lacp_sm_ptx_update_timeout(lp, oldpstate);
}
@@ -1587,7 +1701,10 @@
LACP_TRACE(lp);
oldpstate = lp->lp_partner.lip_state;
- lp->lp_partner = lacp_partner_admin;
+ if (lp->lp_lsc->lsc_strict_mode)
+ lp->lp_partner = lacp_partner_admin_strict;
+ else
+ lp->lp_partner = lacp_partner_admin_optimistic;
lp->lp_state |= LACP_STATE_DEFAULTED;
lacp_sm_ptx_update_timeout(lp, oldpstate);
}
@@ -1622,7 +1739,12 @@
LACP_TRACE(lp);
- lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin);
+ if (lp->lp_lsc->lsc_strict_mode)
+ lacp_sm_rx_update_selected_from_peerinfo(lp,
+ &lacp_partner_admin_strict);
+ else
+ lacp_sm_rx_update_selected_from_peerinfo(lp,
+ &lacp_partner_admin_optimistic);
}
/* transmit machine */
@@ -1630,7 +1752,7 @@
static void
lacp_sm_tx(struct lacp_port *lp)
{
- int error;
+ int error = 0;
if (!(lp->lp_state & LACP_STATE_AGGREGATION)
#if 1
@@ -1652,7 +1774,11 @@
return;
}
- error = lacp_xmit_lacpdu(lp);
+ if (((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_tx_test) == 0) {
+ error = lacp_xmit_lacpdu(lp);
+ } else {
+ LACP_TPRINTF((lp, "Dropping TX PDU\n"));
+ }
if (error == 0) {
lp->lp_flags &= ~LACP_PORT_NTT;
Modified: trunk/sys/net/ieee8023ad_lacp.h
===================================================================
--- trunk/sys/net/ieee8023ad_lacp.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/ieee8023ad_lacp.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/ieee8023ad_lacp.h 177289 2008-03-17 01:26:44Z thompsa $
+ * $FreeBSD: stable/10/sys/net/ieee8023ad_lacp.h 287808 2015-09-15 05:19:10Z hiren $
*/
/*
@@ -76,6 +76,7 @@
"\007DEFAULTED" \
"\010EXPIRED"
+#ifdef _KERNEL
/*
* IEEE802.3 slow protocols
*
@@ -246,6 +247,12 @@
struct lacp_portmap lsc_pmap[2];
volatile u_int lsc_activemap;
u_int32_t lsc_hashkey;
+ struct {
+ u_int32_t lsc_rx_test;
+ u_int32_t lsc_tx_test;
+ } lsc_debug;
+ u_int32_t lsc_strict_mode;
+ boolean_t lsc_fast_timeout; /* if set, fast timeout */
};
#define LACP_TYPE_ACTORINFO 1
@@ -278,8 +285,8 @@
struct mbuf *lacp_input(struct lagg_port *, struct mbuf *);
struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *);
-int lacp_attach(struct lagg_softc *);
-int lacp_detach(struct lagg_softc *);
+void lacp_attach(struct lagg_softc *);
+int lacp_detach(void *);
void lacp_init(struct lagg_softc *);
void lacp_stop(struct lagg_softc *);
int lacp_port_create(struct lagg_port *);
@@ -332,3 +339,4 @@
#define LACP_LAGIDSTR_MAX \
(1 + LACP_PARTNERSTR_MAX + 1 + LACP_PARTNERSTR_MAX + 1)
#define LACP_STATESTR_MAX (255) /* XXX */
+#endif /* _KERNEL */
Modified: trunk/sys/net/if.c
===================================================================
--- trunk/sys/net/if.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)if.c 8.5 (Berkeley) 1/9/95
- * $FreeBSD: stable/9/sys/net/if.c 249132 2013-04-05 08:22:11Z mav $
+ * $FreeBSD: stable/10/sys/net/if.c 333106 2018-04-30 08:39:23Z royger $
*/
#include "opt_compat.h"
@@ -75,18 +75,19 @@
#include <net/vnet.h>
#if defined(INET) || defined(INET6)
-/*XXX*/
+#include <net/ethernet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
+#include <netinet/ip.h>
#include <netinet/ip_carp.h>
+#ifdef INET
+#include <netinet/if_ether.h>
+#endif /* INET */
#ifdef INET6
#include <netinet6/in6_var.h>
#include <netinet6/in6_ifattach.h>
-#endif
-#endif
-#ifdef INET
-#include <netinet/if_ether.h>
-#endif
+#endif /* INET6 */
+#endif /* INET || INET6 */
#include <security/mac/mac_framework.h>
@@ -113,6 +114,14 @@
&log_link_state_change, 0,
"log interface link state change events");
+/* Log promiscuous mode change events */
+static int log_promisc_mode_change = 1;
+
+TUNABLE_INT("net.link.log_promisc_mode_change", &log_promisc_mode_change);
+SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
+ &log_promisc_mode_change, 1,
+ "log promiscuous mode change events");
+
/* Interface description */
static unsigned int ifdescr_maxlen = 1024;
SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
@@ -130,18 +139,22 @@
void (*lagg_linkstate_p)(struct ifnet *ifp, int state);
/* These are external hooks for CARP. */
void (*carp_linkstate_p)(struct ifnet *ifp);
+void (*carp_demote_adj_p)(int, char *);
+int (*carp_master_p)(struct ifaddr *);
#if defined(INET) || defined(INET6)
-struct ifnet *(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
+int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *sa, struct rtentry *rt);
+ const struct sockaddr *sa);
+int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
+int (*carp_attach_p)(struct ifaddr *, int);
+void (*carp_detach_p)(struct ifaddr *);
#endif
#ifdef INET
-int (*carp_iamatch_p)(struct ifnet *, struct in_ifaddr *, struct in_addr *,
- u_int8_t **);
+int (*carp_iamatch_p)(struct ifaddr *, uint8_t **);
#endif
#ifdef INET6
struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
-caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
+caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
const struct in6_addr *taddr);
#endif
@@ -158,6 +171,7 @@
static void if_freemulti(struct ifmultiaddr *);
static void if_init(void *);
static void if_grow(void);
+static void if_input_default(struct ifnet *, struct mbuf *);
static void if_route(struct ifnet *, int flag, int fam);
static int if_setflag(struct ifnet *, int, int, int *, int);
static int if_transmit(struct ifnet *ifp, struct mbuf *m);
@@ -170,8 +184,8 @@
static int if_getgroup(struct ifgroupreq *, struct ifnet *);
static int if_getgroupmembers(struct ifgroupreq *);
static void if_delgroups(struct ifnet *);
-static void if_attach_internal(struct ifnet *, int);
-static void if_detach_internal(struct ifnet *, int);
+static void if_attach_internal(struct ifnet *, int, struct if_clone *);
+static int if_detach_internal(struct ifnet *, int, struct if_clone **);
#ifdef INET6
/*
@@ -334,11 +348,12 @@
struct ifaddr *
ifaddr_byindex(u_short idx)
{
- struct ifaddr *ifa;
+ struct ifnet *ifp;
+ struct ifaddr *ifa = NULL;
IFNET_RLOCK_NOSLEEP();
- ifa = ifnet_byindex_locked(idx)->if_addr;
- if (ifa != NULL)
+ ifp = ifnet_byindex_locked(idx);
+ if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
ifa_ref(ifa);
IFNET_RUNLOCK_NOSLEEP();
return (ifa);
@@ -454,7 +469,6 @@
ifp->if_afdata_initialized = 0;
IF_AFDATA_LOCK_INIT(ifp);
TAILQ_INIT(&ifp->if_addrhead);
- TAILQ_INIT(&ifp->if_prefixhead);
TAILQ_INIT(&ifp->if_multiaddrs);
TAILQ_INIT(&ifp->if_groups);
#ifdef MAC
@@ -495,21 +509,15 @@
}
/*
- * This version should only be called by intefaces that switch their type
- * after calling if_alloc(). if_free_type() will go away again now that we
- * have if_alloctype to cache the original allocation type. For now, assert
- * that they match, since we require that in practice.
+ * Deregister an interface and free the associated storage.
*/
void
-if_free_type(struct ifnet *ifp, u_char type)
+if_free(struct ifnet *ifp)
{
- KASSERT(ifp->if_alloctype == type,
- ("if_free_type: type (%d) != alloctype (%d)", type,
- ifp->if_alloctype));
-
ifp->if_flags |= IFF_DYING; /* XXX: Locking */
+ CURVNET_SET_QUIET(ifp->if_vnet);
IFNET_WLOCK();
KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
("%s: freeing unallocated ifnet", ifp->if_xname));
@@ -517,24 +525,12 @@
ifindex_free_locked(ifp->if_index);
IFNET_WUNLOCK();
- if (!refcount_release(&ifp->if_refcount))
- return;
- if_free_internal(ifp);
+ if (refcount_release(&ifp->if_refcount))
+ if_free_internal(ifp);
+ CURVNET_RESTORE();
}
/*
- * This is the normal version of if_free(), used by device drivers to free a
- * detached network interface. The contents of if_free_type() will move into
- * here when if_free_type() goes away.
- */
-void
-if_free(struct ifnet *ifp)
-{
-
- if_free_type(ifp, ifp->if_alloctype);
-}
-
-/*
* Interfaces to keep an ifnet type-stable despite the possibility of the
* driver calling if_free(). If there are additional references, we defer
* freeing the underlying data structure.
@@ -585,6 +581,15 @@
* tasks, given that we are moving from one vnet to another an ifnet which
* has already been fully initialized.
*
+ * Note that if_detach_internal() removes group membership unconditionally
+ * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
+ * Thus, when if_vmove() is applied to a cloned interface, group membership
+ * is lost while a cloned one always joins a group whose name is
+ * ifc->ifc_name. To recover this after if_detach_internal() and
+ * if_attach_internal(), the cloner should be specified to
+ * if_attach_internal() via ifc. If it is non-NULL, if_attach_internal()
+ * attempts to join a group whose name is ifc->ifc_name.
+ *
* XXX:
* - The decision to return void and thus require this function to
* succeed is questionable.
@@ -595,11 +600,62 @@
if_attach(struct ifnet *ifp)
{
- if_attach_internal(ifp, 0);
+ if_attach_internal(ifp, 0, NULL);
}
+/*
+ * Compute the least common TSO limit.
+ */
+void
+if_hw_tsomax_common(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
+{
+ /*
+ * 1) If there is no limit currently, take the limit from
+ * the network adapter.
+ *
+ * 2) If the network adapter has a limit below the current
+ * limit, apply it.
+ */
+ if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 &&
+ ifp->if_hw_tsomax < pmax->tsomaxbytes)) {
+ pmax->tsomaxbytes = ifp->if_hw_tsomax;
+ }
+ if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 &&
+ ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) {
+ pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
+ }
+ if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 &&
+ ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) {
+ pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
+ }
+}
+
+/*
+ * Update TSO limit of a network adapter.
+ *
+ * Returns zero if no change. Else non-zero.
+ */
+int
+if_hw_tsomax_update(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
+{
+ int retval = 0;
+ if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
+ ifp->if_hw_tsomax = pmax->tsomaxbytes;
+ retval++;
+ }
+ if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) {
+ ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize;
+ retval++;
+ }
+ if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) {
+ ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount;
+ retval++;
+ }
+ return (retval);
+}
+
static void
-if_attach_internal(struct ifnet *ifp, int vmove)
+if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
{
unsigned socksize, ifasize;
int namelen, masklen;
@@ -618,6 +674,10 @@
if_addgroup(ifp, IFG_ALL);
+ /* Restore group membership for cloned interfaces. */
+ if (vmove && ifc != NULL)
+ if_clone_addgroup(ifp, ifc);
+
getmicrotime(&ifp->if_lastchange);
ifp->if_data.ifi_epoch = time_uptime;
ifp->if_data.ifi_datalen = sizeof(struct if_data);
@@ -629,7 +689,9 @@
ifp->if_transmit = if_transmit;
ifp->if_qflush = if_qflush;
}
-
+ if (ifp->if_input == NULL)
+ ifp->if_input = if_input_default;
+
if (!vmove) {
#ifdef MAC
mac_ifnet_create(ifp);
@@ -670,6 +732,37 @@
TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
/* Reliably crash if used uninitialized. */
ifp->if_broadcastaddr = NULL;
+
+ if (ifp->if_type == IFT_ETHER) {
+ ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR,
+ M_WAITOK | M_ZERO);
+ }
+
+#if defined(INET) || defined(INET6)
+ /* Use defaults for TSO, if nothing is set */
+ if (ifp->if_hw_tsomax == 0 &&
+ ifp->if_hw_tsomaxsegcount == 0 &&
+ ifp->if_hw_tsomaxsegsize == 0) {
+ /*
+ * The TSO defaults needs to be such that an
+ * NFS mbuf list of 35 mbufs totalling just
+ * below 64K works and that a chain of mbufs
+ * can be defragged into at most 32 segments:
+ */
+ ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) -
+ (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
+ ifp->if_hw_tsomaxsegcount = 35;
+ ifp->if_hw_tsomaxsegsize = 2048; /* 2K */
+
+ /* XXX some drivers set IFCAP_TSO after ethernet attach */
+ if (ifp->if_capabilities & IFCAP_TSO) {
+ if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n",
+ ifp->if_hw_tsomax,
+ ifp->if_hw_tsomaxsegcount,
+ ifp->if_hw_tsomaxsegsize);
+ }
+ }
+#endif
}
#ifdef VIMAGE
else {
@@ -709,12 +802,9 @@
if_attachdomain(void *dummy)
{
struct ifnet *ifp;
- int s;
- s = splnet();
TAILQ_FOREACH(ifp, &V_ifnet, if_link)
if_attachdomain1(ifp);
- splx(s);
}
SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
if_attachdomain, NULL);
@@ -723,23 +813,17 @@
if_attachdomain1(struct ifnet *ifp)
{
struct domain *dp;
- int s;
- s = splnet();
-
/*
* Since dp->dom_ifattach calls malloc() with M_WAITOK, we
* cannot lock ifp->if_afdata initialization, entirely.
*/
- if (IF_AFDATA_TRYLOCK(ifp) == 0) {
- splx(s);
+ if (IF_AFDATA_TRYLOCK(ifp) == 0)
return;
- }
if (ifp->if_afdata_initialized >= domain_init_status) {
IF_AFDATA_UNLOCK(ifp);
- splx(s);
- printf("if_attachdomain called more than once on %s\n",
- ifp->if_xname);
+ log(LOG_WARNING, "%s called more than once on %s\n",
+ __func__, ifp->if_xname);
return;
}
ifp->if_afdata_initialized = domain_init_status;
@@ -752,8 +836,6 @@
ifp->if_afdata[dp->dom_family] =
(*dp->dom_ifattach)(ifp);
}
-
- splx(s);
}
/*
@@ -822,11 +904,13 @@
if_detach(struct ifnet *ifp)
{
- if_detach_internal(ifp, 0);
+ CURVNET_SET_QUIET(ifp->if_vnet);
+ if_detach_internal(ifp, 0, NULL);
+ CURVNET_RESTORE();
}
-static void
-if_detach_internal(struct ifnet *ifp, int vmove)
+static int
+if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
{
struct ifaddr *ifa;
struct radix_node_head *rnh;
@@ -848,13 +932,25 @@
#endif
IFNET_WUNLOCK();
if (!found) {
+ /*
+ * While we would want to panic here, we cannot
+ * guarantee that the interface is indeed still on
+ * the list given we don't hold locks all the way.
+ */
+ return (ENOENT);
+#if 0
if (vmove)
panic("%s: ifp=%p not on the ifnet tailq %p",
__func__, ifp, &V_ifnet);
else
return; /* XXX this should panic as well? */
+#endif
}
+ /* Check if this is a cloned interface or not. */
+ if (vmove && ifcp != NULL)
+ *ifcp = if_clone_findifc(ifp);
+
/*
* Remove/wait for pending events.
*/
@@ -888,6 +984,12 @@
#endif
if_purgemaddrs(ifp);
+ /* Announce that the interface is gone. */
+ rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
+ EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
+ if (IS_DEFAULT_VNET(curvnet))
+ devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
+
if (!vmove) {
/*
* Prevent further calls into the device driver via ifnet.
@@ -898,6 +1000,8 @@
* Remove link ifaddr pointer and maybe decrement if_index.
* Clean up all addresses.
*/
+ free(ifp->if_hw_addr, M_IFADDR);
+ ifp->if_hw_addr = NULL;
ifp->if_addr = NULL;
/* We can now free link ifaddr. */
@@ -925,11 +1029,6 @@
}
}
- /* Announce that the interface is gone. */
- rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
- EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
- if (IS_DEFAULT_VNET(curvnet))
- devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
if_delgroups(ifp);
/*
@@ -946,6 +1045,8 @@
(*dp->dom_ifdetach)(ifp,
ifp->if_afdata[dp->dom_family]);
}
+
+ return (0);
}
#ifdef VIMAGE
@@ -960,12 +1061,17 @@
if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
{
u_short idx;
+ struct if_clone *ifc;
+ int rc;
/*
* Detach from current vnet, but preserve LLADDR info, do not
* mark as dead etc. so that the ifnet can be reattached later.
+ * If we cannot find it, we lost the race to someone else.
*/
- if_detach_internal(ifp, 1);
+ rc = if_detach_internal(ifp, 1, &ifc);
+ if (rc != 0)
+ return;
/*
* Unlink the ifnet from ifindex_table[] in current vnet, and shrink
@@ -999,7 +1105,7 @@
ifnet_setbyindex_locked(ifp->if_index, ifp);
IFNET_WUNLOCK();
- if_attach_internal(ifp, 1);
+ if_attach_internal(ifp, 1, ifc);
CURVNET_RESTORE();
}
@@ -1102,6 +1208,7 @@
struct ifg_list *ifgl;
struct ifg_group *ifg = NULL;
struct ifg_member *ifgm;
+ int new = 0;
if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
groupname[strlen(groupname) - 1] <= '9')
@@ -1142,8 +1249,8 @@
strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
ifg->ifg_refcnt = 0;
TAILQ_INIT(&ifg->ifg_members);
- EVENTHANDLER_INVOKE(group_attach_event, ifg);
TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
+ new = 1;
}
ifg->ifg_refcnt++;
@@ -1157,6 +1264,8 @@
IFNET_WUNLOCK();
+ if (new)
+ EVENTHANDLER_INVOKE(group_attach_event, ifg);
EVENTHANDLER_INVOKE(group_change_event, groupname);
return (0);
@@ -1195,10 +1304,11 @@
if (--ifgl->ifgl_group->ifg_refcnt == 0) {
TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
+ IFNET_WUNLOCK();
EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
free(ifgl->ifgl_group, M_TEMP);
- }
- IFNET_WUNLOCK();
+ } else
+ IFNET_WUNLOCK();
free(ifgl, M_TEMP);
@@ -1239,11 +1349,12 @@
if (--ifgl->ifgl_group->ifg_refcnt == 0) {
TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
+ IFNET_WUNLOCK();
EVENTHANDLER_INVOKE(group_detach_event,
ifgl->ifgl_group);
free(ifgl->ifgl_group, M_TEMP);
- }
- IFNET_WUNLOCK();
+ } else
+ IFNET_WUNLOCK();
free(ifgl, M_TEMP);
@@ -1392,6 +1503,100 @@
}
/*
+ * A compatibility function returns ifnet counter values.
+ */
+uint64_t
+if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
+{
+
+ KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
+ switch (cnt) {
+ case IFCOUNTER_IPACKETS:
+ return (ifp->if_ipackets);
+ case IFCOUNTER_IERRORS:
+ return (ifp->if_ierrors);
+ case IFCOUNTER_OPACKETS:
+ return (ifp->if_opackets);
+ case IFCOUNTER_OERRORS:
+ return (ifp->if_oerrors);
+ case IFCOUNTER_COLLISIONS:
+ return (ifp->if_collisions);
+ case IFCOUNTER_IBYTES:
+ return (ifp->if_ibytes);
+ case IFCOUNTER_OBYTES:
+ return (ifp->if_obytes);
+ case IFCOUNTER_IMCASTS:
+ return (ifp->if_imcasts);
+ case IFCOUNTER_OMCASTS:
+ return (ifp->if_omcasts);
+ case IFCOUNTER_IQDROPS:
+ return (ifp->if_iqdrops);
+#ifdef _IFI_OQDROPS
+ case IFCOUNTER_OQDROPS:
+ return (ifp->if_oqdrops);
+#endif
+ case IFCOUNTER_NOPROTO:
+ return (ifp->if_noproto);
+ default:
+ break;
+ };
+ return (0);
+}
+
+/*
+ * Increase an ifnet counter. Usually used for counters shared
+ * between the stack and a driver, but function supports them all.
+ */
+void
+if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
+{
+
+ KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
+ switch (cnt) {
+ case IFCOUNTER_IPACKETS:
+ ifp->if_ipackets += inc;
+ break;
+ case IFCOUNTER_IERRORS:
+ ifp->if_ierrors += inc;
+ break;
+ case IFCOUNTER_OPACKETS:
+ ifp->if_opackets += inc;
+ break;
+ case IFCOUNTER_OERRORS:
+ ifp->if_oerrors += inc;
+ break;
+ case IFCOUNTER_COLLISIONS:
+ ifp->if_collisions += inc;
+ break;
+ case IFCOUNTER_IBYTES:
+ ifp->if_ibytes += inc;
+ break;
+ case IFCOUNTER_OBYTES:
+ ifp->if_obytes += inc;
+ break;
+ case IFCOUNTER_IMCASTS:
+ ifp->if_imcasts += inc;
+ break;
+ case IFCOUNTER_OMCASTS:
+ ifp->if_omcasts += inc;
+ break;
+ case IFCOUNTER_IQDROPS:
+ ifp->if_iqdrops += inc;
+ break;
+#ifdef _IFI_OQDROPS
+ case IFCOUNTER_OQDROPS:
+ ifp->if_oqdrops += inc;
+ break;
+#endif
+ case IFCOUNTER_NOPROTO:
+ ifp->if_noproto += inc;
+ break;
+ default:
+ break;
+ };
+}
+
+/*
* Wrapper functions for struct ifnet address list locking macros. These are
* used by kernel modules to avoid encoding programming interface or binary
* interface assumptions that may be violated when kernel-internal locking
@@ -1426,7 +1631,7 @@
}
/*
- * Reference count functions for ifaddrs.
+ * Initialization, destruction and refcounting functions for ifaddrs.
*/
void
ifa_init(struct ifaddr *ifa)
@@ -1434,6 +1639,7 @@
mtx_init(&ifa->ifa_mtx, "ifaddr", NULL, MTX_DEF);
refcount_init(&ifa->ifa_refcnt, 1);
+ ifa->if_data.ifi_datalen = sizeof(ifa->if_data);
}
void
@@ -1463,10 +1669,10 @@
bzero(&info, sizeof(info));
info.rti_ifp = V_loif;
- info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
+ info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
info.rti_info[RTAX_DST] = ia;
info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
- error = rtrequest1_fib(RTM_ADD, &info, &rt, 0);
+ error = rtrequest1_fib(RTM_ADD, &info, &rt, ifa->ifa_ifp->if_fib);
if (error == 0 && rt != NULL) {
RT_LOCK(rt);
@@ -1477,7 +1683,7 @@
RT_REMREF(rt);
RT_UNLOCK(rt);
} else if (error != 0)
- log(LOG_INFO, "ifa_add_loopback_route: insertion failed\n");
+ log(LOG_DEBUG, "%s: insertion failed: %u\n", __func__, error);
return (error);
}
@@ -1495,13 +1701,13 @@
null_sdl.sdl_type = ifa->ifa_ifp->if_type;
null_sdl.sdl_index = ifa->ifa_ifp->if_index;
bzero(&info, sizeof(info));
- info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
+ info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
info.rti_info[RTAX_DST] = ia;
info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
- error = rtrequest1_fib(RTM_DELETE, &info, NULL, 0);
+ error = rtrequest1_fib(RTM_DELETE, &info, NULL, ifa->ifa_ifp->if_fib);
if (error != 0)
- log(LOG_INFO, "ifa_del_loopback_route: deletion failed\n");
+ log(LOG_DEBUG, "%s: deletion failed: %u\n", __func__, error);
return (error);
}
@@ -1615,7 +1821,7 @@
*/
/*ARGSUSED*/
struct ifaddr *
-ifa_ifwithdstaddr(struct sockaddr *addr)
+ifa_ifwithdstaddr_fib(struct sockaddr *addr, int fibnum)
{
struct ifnet *ifp;
struct ifaddr *ifa;
@@ -1624,6 +1830,8 @@
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
continue;
+ if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
+ continue;
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != addr->sa_family)
@@ -1643,12 +1851,19 @@
return (ifa);
}
+struct ifaddr *
+ifa_ifwithdstaddr(struct sockaddr *addr)
+{
+
+ return (ifa_ifwithdstaddr_fib(addr, RT_ALL_FIBS));
+}
+
/*
* Find an interface on a specific network. If many, choice
* is most specific found.
*/
struct ifaddr *
-ifa_ifwithnet(struct sockaddr *addr, int ignore_ptp)
+ifa_ifwithnet_fib(struct sockaddr *addr, int ignore_ptp, int fibnum)
{
struct ifnet *ifp;
struct ifaddr *ifa;
@@ -1668,12 +1883,14 @@
/*
* Scan though each interface, looking for ones that have addresses
- * in this address family. Maintain a reference on ifa_maybe once
- * we find one, as we release the IF_ADDR_RLOCK() that kept it stable
- * when we move onto the next interface.
+ * in this address family and the requested fib. Maintain a reference
+ * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that
+ * kept it stable when we move onto the next interface.
*/
IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
+ continue;
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
char *cp, *cp2, *cp3;
@@ -1730,11 +1947,13 @@
/*
* If the netmask of what we just found
* is more specific than what we had before
- * (if we had one) then remember the new one
- * before continuing to search
- * for an even better one.
+ * (if we had one), or if the virtual status
+ * of new prefix is better than of the old one,
+ * then remember the new one before continuing
+ * to search for an even better one.
*/
if (ifa_maybe == NULL ||
+ ifa_preferred(ifa_maybe, ifa) ||
rn_refines((caddr_t)ifa->ifa_netmask,
(caddr_t)ifa_maybe->ifa_netmask)) {
if (ifa_maybe != NULL)
@@ -1755,6 +1974,13 @@
return (ifa);
}
+struct ifaddr *
+ifa_ifwithnet(struct sockaddr *addr, int ignore_ptp)
+{
+
+ return (ifa_ifwithnet_fib(addr, ignore_ptp, RT_ALL_FIBS));
+}
+
/*
* Find an interface address specific to an interface best matching
* a given address.
@@ -1806,6 +2032,21 @@
return (ifa);
}
+/*
+ * See whether new ifa is better than current one:
+ * 1) A non-virtual one is preferred over virtual.
+ * 2) A virtual in master state preferred over any other state.
+ *
+ * Used in several address selecting functions.
+ */
+int
+ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
+{
+
+ return (cur->ifa_carp && (!next->ifa_carp ||
+ ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
+}
+
#include <net/if_llatbl.h>
/*
@@ -1838,7 +2079,6 @@
/*
* Mark an interface down and notify protocols of
* the transition.
- * NOTE: must be called at splnet or eqivalent.
*/
static void
if_unroute(struct ifnet *ifp, int flag, int fam)
@@ -1862,7 +2102,6 @@
/*
* Mark an interface up and notify protocols of
* the transition.
- * NOTE: must be called at splnet or eqivalent.
*/
static void
if_route(struct ifnet *ifp, int flag, int fam)
@@ -1941,7 +2180,7 @@
if (log_link_state_change)
log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
(link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
- EVENTHANDLER_INVOKE(ifnet_link_event, ifp, ifp->if_link_state);
+ EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
CURVNET_RESTORE();
}
@@ -1948,12 +2187,12 @@
/*
* Mark an interface down and notify protocols of
* the transition.
- * NOTE: must be called at splnet or eqivalent.
*/
void
if_down(struct ifnet *ifp)
{
+ EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
if_unroute(ifp, IFF_UP, AF_UNSPEC);
}
@@ -1960,7 +2199,6 @@
/*
* Mark an interface up and notify protocols of
* the transition.
- * NOTE: must be called at splnet or eqivalent.
*/
void
if_up(struct ifnet *ifp)
@@ -1967,6 +2205,7 @@
{
if_route(ifp, IFF_UP, AF_UNSPEC);
+ EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
}
/*
@@ -1986,7 +2225,7 @@
#endif
n = ifq->ifq_head;
while ((m = n) != 0) {
- n = m->m_act;
+ n = m->m_nextpkt;
m_freem(m);
}
ifq->ifq_head = 0;
@@ -2038,7 +2277,7 @@
{
struct ifreq *ifr;
struct ifstat *ifs;
- int error = 0;
+ int error = 0, do_ifup = 0;
int new_flags, temp_flags;
size_t namelen, onamelen;
size_t descrlen;
@@ -2163,14 +2402,10 @@
/* Smart drivers twiddle their own routes */
} else if (ifp->if_flags & IFF_UP &&
(new_flags & IFF_UP) == 0) {
- int s = splimp();
if_down(ifp);
- splx(s);
} else if (new_flags & IFF_UP &&
(ifp->if_flags & IFF_UP) == 0) {
- int s = splimp();
- if_up(ifp);
- splx(s);
+ do_ifup = 1;
}
/* See if permanently promiscuous mode bit is about to flip */
if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
@@ -2178,9 +2413,11 @@
ifp->if_flags |= IFF_PROMISC;
else if (ifp->if_pcount == 0)
ifp->if_flags &= ~IFF_PROMISC;
- log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
- ifp->if_xname,
- (new_flags & IFF_PPROMISC) ? "enabled" : "disabled");
+ if (log_promisc_mode_change)
+ log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
+ ifp->if_xname,
+ ((new_flags & IFF_PPROMISC) ?
+ "enabled" : "disabled"));
}
ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
(new_flags &~ IFF_CANTCHANGE);
@@ -2187,6 +2424,8 @@
if (ifp->if_ioctl) {
(void) (*ifp->if_ioctl)(ifp, cmd, data);
}
+ if (do_ifup)
+ if_up(ifp);
getmicrotime(&ifp->if_lastchange);
break;
@@ -2218,6 +2457,11 @@
return (error);
if (new_name[0] == '\0')
return (EINVAL);
+ if (new_name[IFNAMSIZ-1] != '\0') {
+ new_name[IFNAMSIZ-1] = '\0';
+ if (strlen(new_name) == IFNAMSIZ-1)
+ return (EINVAL);
+ }
if (ifunit(new_name) != NULL)
return (EEXIST);
@@ -2388,7 +2632,10 @@
case SIOCGIFPDSTADDR:
case SIOCGLIFPHYADDR:
case SIOCGIFMEDIA:
+ case SIOCGIFXMEDIA:
case SIOCGIFGENERIC:
+ case SIOCGIFRSSKEY:
+ case SIOCGIFRSSHASH:
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
error = (*ifp->if_ioctl)(ifp, cmd, data);
@@ -2403,6 +2650,10 @@
EVENTHANDLER_INVOKE(iflladdr_event, ifp);
break;
+ case SIOCGHWADDR:
+ error = if_gethwaddr(ifp, ifr);
+ break;
+
case SIOCAIFGROUP:
{
struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
@@ -2523,6 +2774,16 @@
error = if_getgroupmembers((struct ifgroupreq *)data);
CURVNET_RESTORE();
return (error);
+#if defined(INET) || defined(INET6)
+ case SIOCSVH:
+ case SIOCGVH:
+ if (carp_ioctl_p == NULL)
+ error = EPROTONOSUPPORT;
+ else
+ error = (*carp_ioctl_p)(ifr, cmd, td);
+ CURVNET_RESTORE();
+ return (error);
+#endif
}
ifp = ifunit_ref(ifr->ifr_name);
@@ -2544,11 +2805,23 @@
CURVNET_RESTORE();
return (EOPNOTSUPP);
}
+
+ /*
+ * Pass the request on to the socket control method, and if the
+ * latter returns EOPNOTSUPP, directly to the interface.
+ *
+ * Make an exception for the legacy SIOCSIF* requests. Drivers
+ * trust SIOCSIFADDR et al to come from an already privileged
+ * layer, and do not perform any credentials checks or input
+ * validation.
+ */
#ifndef COMPAT_43
error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
data,
ifp, td));
- if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL)
+ if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
+ cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
+ cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
error = (*ifp->if_ioctl)(ifp, cmd, data);
#else
{
@@ -2592,7 +2865,9 @@
data,
ifp, td));
if (error == EOPNOTSUPP && ifp != NULL &&
- ifp->if_ioctl != NULL)
+ ifp->if_ioctl != NULL &&
+ cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
+ cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
error = (*ifp->if_ioctl)(ifp, cmd, data);
switch (ocmd) {
@@ -2608,11 +2883,8 @@
if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
#ifdef INET6
- if (ifp->if_flags & IFF_UP) {
- int s = splimp();
+ if (ifp->if_flags & IFF_UP)
in6_if_up(ifp);
- splx(s);
- }
#endif
}
if_rele(ifp);
@@ -2708,7 +2980,8 @@
error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
&ifp->if_pcount, pswitch);
/* If promiscuous mode status has changed, log a message */
- if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC))
+ if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
+ log_promisc_mode_change)
log(LOG_INFO, "%s: promiscuous mode %s\n",
ifp->if_xname,
(ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
@@ -2753,10 +3026,10 @@
int addrs;
/*
- * Zero the ifr_name buffer to make sure we don't
- * disclose the contents of the stack.
+ * Zero the ifr to make sure we don't disclose the contents
+ * of the stack.
*/
- memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));
+ memset(&ifr, 0, sizeof(ifr));
if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
>= sizeof(ifr.ifr_name)) {
@@ -2784,7 +3057,13 @@
} else
#endif
if (sa->sa_len <= sizeof(*sa)) {
- ifr.ifr_addr = *sa;
+ if (sa->sa_len < sizeof(*sa)) {
+ memset(&ifr.ifr_ifru.ifru_addr, 0,
+ sizeof(ifr.ifr_ifru.ifru_addr));
+ memcpy(&ifr.ifr_ifru.ifru_addr, sa,
+ sa->sa_len);
+ } else
+ ifr.ifr_ifru.ifru_addr = *sa;
sbuf_bcat(sb, &ifr, sizeof(ifr));
max_len += sizeof(ifr);
} else {
@@ -2800,7 +3079,6 @@
}
IF_ADDR_RUNLOCK(ifp);
if (addrs == 0) {
- bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
sbuf_bcat(sb, &ifr, sizeof(ifr));
max_len += sizeof(ifr);
@@ -3320,6 +3598,29 @@
}
/*
+ * Get the link layer address that was read from the hardware at attach.
+ *
+ * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type
+ * their component interfaces as IFT_IEEE8023ADLAG.
+ */
+int
+if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr)
+{
+
+ if (ifp->if_hw_addr == NULL)
+ return (ENODEV);
+
+ switch (ifp->if_type) {
+ case IFT_ETHER:
+ case IFT_IEEE8023ADLAG:
+ bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen);
+ return (0);
+ default:
+ return (ENODEV);
+ }
+}
+
+/*
* The name argument must be a pointer to storage which will last as
* long as the interface does. For physical devices, the result of
* device_get_name(dev) is a good choice and for pseudo-devices a
@@ -3369,6 +3670,13 @@
return (error);
}
+static void
+if_input_default(struct ifnet *ifp __unused, struct mbuf *m)
+{
+
+ m_freem(m);
+}
+
int
if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
{
Modified: trunk/sys/net/if.h
===================================================================
--- trunk/sys/net/if.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)if.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if.h 238247 2012-07-08 14:21:36Z bz $
+ * $FreeBSD: stable/10/sys/net/if.h 324462 2017-10-10 02:35:04Z sephe $
*/
#ifndef _NET_IF_H_
@@ -86,8 +86,8 @@
u_char ifi_addrlen; /* media address length */
u_char ifi_hdrlen; /* media header length */
u_char ifi_link_state; /* current link state */
- u_char ifi_spare_char1; /* spare byte */
- u_char ifi_spare_char2; /* spare byte */
+ u_char ifi_vhid; /* carp vhid */
+ u_char ifi_baudrate_pf; /* baudrate power factor */
u_char ifi_datalen; /* length of this data struct */
u_long ifi_mtu; /* maximum transmission unit */
u_long ifi_metric; /* routing metric (external only) */
@@ -104,9 +104,12 @@
u_long ifi_omcasts; /* packets sent via multicast */
u_long ifi_iqdrops; /* dropped on input, this interface */
u_long ifi_noproto; /* destined for unsupported protocol */
- u_long ifi_hwassist; /* HW offload capabilities, see IFCAP */
+ uint64_t ifi_hwassist; /* HW offload capabilities, see IFCAP */
time_t ifi_epoch; /* uptime at attach or stat reset */
struct timeval ifi_lastchange; /* time of last administrative change */
+#ifdef _IFI_OQDROPS
+ u_long ifi_oqdrops; /* dropped on output */
+#endif
};
/*-
@@ -154,7 +157,6 @@
#define IFF_STATICARP 0x80000 /* (n) static ARP */
#define IFF_DYING 0x200000 /* (n) interface is winding down */
#define IFF_RENAMING 0x400000 /* (n) interface is being renamed */
-
/*
* Old names for driver flags so that user space tools can continue to use
* the old (portable) names.
@@ -181,7 +183,7 @@
* Some convenience macros used for setting ifi_baudrate.
* XXX 1000 vs. 1024? --thorpej at netbsd.org
*/
-#define IF_Kbps(x) ((x) * 1000) /* kilobits/sec. */
+#define IF_Kbps(x) ((uintmax_t)(x) * 1000) /* kilobits/sec. */
#define IF_Mbps(x) (IF_Kbps((x) * 1000)) /* megabits/sec. */
#define IF_Gbps(x) (IF_Mbps((x) * 1000)) /* gigabits/sec. */
@@ -233,6 +235,7 @@
#define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */
#define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */
#define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */
+#define IFCAP_HWSTATS 0x800000 /* manages counters internally */
#define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
@@ -284,6 +287,9 @@
u_short ifm_len; /* length of if_msghdrl incl. if_data */
u_short ifm_data_off; /* offset of if_data from beginning */
struct if_data ifm_data;/* statistics and other data about if */
+#ifdef _IN_NET_RTSOCK_C
+ u_long ifi_oqdrops;
+#endif
};
/*
@@ -416,8 +422,17 @@
struct sockaddr ifra_addr;
struct sockaddr ifra_broadaddr;
struct sockaddr ifra_mask;
+ int ifra_vhid;
};
+/* Compat with pre-10.x */
+struct oifaliasreq {
+ char ifra_name[IFNAMSIZ];
+ struct sockaddr ifra_addr;
+ struct sockaddr ifra_broadaddr;
+ struct sockaddr ifra_mask;
+};
+
struct ifmediareq {
char ifm_name[IFNAMSIZ]; /* if name, e.g. "en0" */
int ifm_current; /* current media options */
@@ -507,6 +522,55 @@
struct sockaddr_storage dstaddr; /* out */
};
+/*
+ * Structure used to request i2c data
+ * from interface transceivers.
+ */
+struct ifi2creq {
+ uint8_t dev_addr; /* i2c address (0xA0, 0xA2) */
+ uint8_t offset; /* read offset */
+ uint8_t len; /* read length */
+ uint8_t spare0;
+ uint32_t spare1;
+ uint8_t data[8]; /* read buffer */
+};
+
+/*
+ * RSS hash.
+ */
+
+#define RSS_FUNC_NONE 0 /* RSS disabled */
+#define RSS_FUNC_PRIVATE 1 /* non-standard */
+#define RSS_FUNC_TOEPLITZ 2
+
+#define RSS_TYPE_IPV4 0x00000001
+#define RSS_TYPE_TCP_IPV4 0x00000002
+#define RSS_TYPE_IPV6 0x00000004
+#define RSS_TYPE_IPV6_EX 0x00000008
+#define RSS_TYPE_TCP_IPV6 0x00000010
+#define RSS_TYPE_TCP_IPV6_EX 0x00000020
+#define RSS_TYPE_UDP_IPV4 0x00000040
+#define RSS_TYPE_UDP_IPV6 0x00000080
+#define RSS_TYPE_UDP_IPV6_EX 0x00000100
+
+#define RSS_KEYLEN 128
+
+struct ifrsskey {
+ char ifrk_name[IFNAMSIZ]; /* if name, e.g. "en0" */
+ uint8_t ifrk_func; /* RSS_FUNC_ */
+ uint8_t ifrk_spare0;
+ uint16_t ifrk_keylen;
+ uint8_t ifrk_key[RSS_KEYLEN];
+};
+
+struct ifrsshash {
+ char ifrh_name[IFNAMSIZ]; /* if name, e.g. "en0" */
+ uint8_t ifrh_func; /* RSS_FUNC_ */
+ uint8_t ifrh_spare0;
+ uint16_t ifrh_spare1;
+ uint32_t ifrh_types; /* RSS_TYPE_ */
+};
+
#endif /* __BSD_VISIBLE */
#ifdef _KERNEL
Modified: trunk/sys/net/if_arc.h
===================================================================
--- trunk/sys/net/if_arc.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_arc.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
/* $NetBSD: if_arc.h,v 1.13 1999/11/19 20:41:19 thorpej Exp $ */
-/* $FreeBSD: stable/9/sys/net/if_arc.h 194581 2009-06-21 10:29:31Z rdivacky $ */
+/* $FreeBSD: stable/10/sys/net/if_arc.h 249925 2013-04-26 12:50:32Z glebius $ */
/*-
* Copyright (c) 1982, 1986, 1993
@@ -134,7 +134,7 @@
int arc_isphds(u_int8_t);
void arc_input(struct ifnet *, struct mbuf *);
int arc_output(struct ifnet *, struct mbuf *,
- struct sockaddr *, struct route *);
+ const struct sockaddr *, struct route *);
int arc_ioctl(struct ifnet *, u_long, caddr_t);
void arc_frag_init(struct ifnet *);
Modified: trunk/sys/net/if_arcsubr.c
===================================================================
--- trunk/sys/net/if_arcsubr.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_arcsubr.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
/* $NetBSD: if_arcsubr.c,v 1.36 2001/06/14 05:44:23 itojun Exp $ */
-/* $FreeBSD: stable/9/sys/net/if_arcsubr.c 249132 2013-04-05 08:22:11Z mav $ */
+/* $FreeBSD: stable/10/sys/net/if_arcsubr.c 332160 2018-04-07 00:04:28Z brooks $ */
/*-
* Copyright (c) 1994, 1995 Ignatios Souvatzis
@@ -93,8 +93,8 @@
#define ARC_LLADDR(ifp) (*(u_int8_t *)IF_LLADDR(ifp))
#define senderr(e) { error = (e); goto bad;}
-#define SIN(s) ((struct sockaddr_in *)s)
-#define SIPX(s) ((struct sockaddr_ipx *)s)
+#define SIN(s) ((const struct sockaddr_in *)(s))
+#define SIPX(s) ((const struct sockaddr_ipx *)(s))
/*
* ARCnet output routine.
@@ -102,7 +102,7 @@
* Assumes that ifp is actually pointer to arccom structure.
*/
int
-arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+arc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct route *ro)
{
struct arc_header *ah;
@@ -187,8 +187,11 @@
#endif
case AF_UNSPEC:
+ {
+ const struct arc_header *ah;
+
loop_copy = -1;
- ah = (struct arc_header *)dst->sa_data;
+ ah = (const struct arc_header *)dst->sa_data;
adst = ah->arc_dhost;
atype = ah->arc_type;
@@ -208,7 +211,7 @@
#endif
}
break;
-
+ }
default:
if_printf(ifp, "can't handle af%d\n", dst->sa_family);
senderr(EAFNOSUPPORT);
@@ -215,7 +218,7 @@
}
isphds = arc_isphds(atype);
- M_PREPEND(m, isphds ? ARC_HDRNEWLEN : ARC_HDRLEN, M_DONTWAIT);
+ M_PREPEND(m, isphds ? ARC_HDRNEWLEN : ARC_HDRLEN, M_NOWAIT);
if (m == 0)
senderr(ENOBUFS);
ah = mtod(m, struct arc_header *);
@@ -295,13 +298,13 @@
/* split out next fragment and return it */
if (ac->sflag < ac->fsflag) {
/* we CAN'T have short packets here */
- ac->curr_frag = m_split(m, ARC_MAX_DATA, M_DONTWAIT);
+ ac->curr_frag = m_split(m, ARC_MAX_DATA, M_NOWAIT);
if (ac->curr_frag == 0) {
m_freem(m);
return 0;
}
- M_PREPEND(m, ARC_HDRNEWLEN, M_DONTWAIT);
+ M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT);
if (m == 0) {
m_freem(ac->curr_frag);
ac->curr_frag = 0;
@@ -320,7 +323,7 @@
ARC_MAX_FORBID_LEN - ARC_HDRNEWLEN + 2)) {
ac->curr_frag = 0;
- M_PREPEND(m, ARC_HDRNEWLEN_EXC, M_DONTWAIT);
+ M_PREPEND(m, ARC_HDRNEWLEN_EXC, M_NOWAIT);
if (m == 0)
return 0;
@@ -333,7 +336,7 @@
} else {
ac->curr_frag = 0;
- M_PREPEND(m, ARC_HDRNEWLEN, M_DONTWAIT);
+ M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT);
if (m == 0)
return 0;
@@ -639,11 +642,7 @@
ifp->if_resolvemulti = arc_resolvemulti;
if (ifp->if_baudrate == 0)
ifp->if_baudrate = 2500000;
-#if __FreeBSD_version < 500000
- ifa = ifnet_addrs[ifp->if_index - 1];
-#else
ifa = ifp->if_addr;
-#endif
KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
sdl = (struct sockaddr_dl *)ifa->ifa_addr;
sdl->sdl_type = IFT_ARCNET;
@@ -717,12 +716,7 @@
break;
case SIOCGIFADDR:
- {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *) &ifr->ifr_data;
- *(u_int8_t *)sa->sa_data = ARC_LLADDR(ifp);
- }
+ ifr->ifr_addr.sa_data[0] = ARC_LLADDR(ifp);
break;
case SIOCADDMULTI:
Modified: trunk/sys/net/if_arp.h
===================================================================
--- trunk/sys/net/if_arp.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_arp.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)if_arp.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if_arp.h 219819 2011-03-21 09:40:01Z jeff $
+ * $FreeBSD: stable/10/sys/net/if_arp.h 253084 2013-07-09 09:50:15Z ae $
*/
#ifndef _NET_IF_ARP_H_
@@ -114,27 +114,35 @@
struct arpstat {
/* Normal things that happen: */
- u_long txrequests; /* # of ARP requests sent by this host. */
- u_long txreplies; /* # of ARP replies sent by this host. */
- u_long rxrequests; /* # of ARP requests received by this host. */
- u_long rxreplies; /* # of ARP replies received by this host. */
- u_long received; /* # of ARP packets received by this host. */
+ uint64_t txrequests; /* # of ARP requests sent by this host. */
+ uint64_t txreplies; /* # of ARP replies sent by this host. */
+ uint64_t rxrequests; /* # of ARP requests received by this host. */
+ uint64_t rxreplies; /* # of ARP replies received by this host. */
+ uint64_t received; /* # of ARP packets received by this host. */
- u_long arp_spares[4]; /* For either the upper or lower half. */
+ uint64_t arp_spares[4]; /* For either the upper or lower half. */
/* Abnormal event and error counting: */
- u_long dropped; /* # of packets dropped waiting for a reply. */
- u_long timeouts; /* # of times with entries removed */
+ uint64_t dropped; /* # of packets dropped waiting for a reply. */
+ uint64_t timeouts; /* # of times with entries removed */
/* due to timeout. */
- u_long dupips; /* # of duplicate IPs detected. */
+ uint64_t dupips; /* # of duplicate IPs detected. */
};
+#ifdef _KERNEL
+#include <sys/counter.h>
+#include <net/vnet.h>
+
+VNET_PCPUSTAT_DECLARE(struct arpstat, arpstat);
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
-#define ARPSTAT_ADD(name, val) V_arpstat.name += (val)
-#define ARPSTAT_SUB(name, val) V_arpstat.name -= (val)
+#define ARPSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct arpstat, arpstat, name, (val))
+#define ARPSTAT_SUB(name, val) ARPSTAT_ADD(name, -(val))
#define ARPSTAT_INC(name) ARPSTAT_ADD(name, 1)
#define ARPSTAT_DEC(name) ARPSTAT_SUB(name, 1)
+#endif /* _KERNEL */
+
#endif /* !_NET_IF_ARP_H_ */
Modified: trunk/sys/net/if_atm.h
===================================================================
--- trunk/sys/net/if_atm.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_atm.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
/* $NetBSD: if_atm.h,v 1.7 1996/11/09 23:02:27 chuck Exp $ */
-/* $FreeBSD: stable/9/sys/net/if_atm.h 191148 2009-04-16 20:30:28Z kmacy $ */
+/* $FreeBSD: stable/10/sys/net/if_atm.h 249925 2013-04-26 12:50:32Z glebius $ */
/*-
*
@@ -293,7 +293,7 @@
void atm_ifdetach(struct ifnet *);
void atm_input(struct ifnet *, struct atm_pseudohdr *,
struct mbuf *, void *);
-int atm_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+int atm_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
struct atmio_vcctable *atm_getvccs(struct atmio_vcc **, u_int, u_int,
struct mtx *, int);
Modified: trunk/sys/net/if_atmsubr.c
===================================================================
--- trunk/sys/net/if_atmsubr.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_atmsubr.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_atmsubr.c 249132 2013-04-05 08:22:11Z mav $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_atmsubr.c 249925 2013-04-26 12:50:32Z glebius $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -122,7 +122,7 @@
* ro->ro_rt must also be NULL.
*/
int
-atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
+atm_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
struct route *ro)
{
u_int16_t etype = 0; /* if using LLC/SNAP */
@@ -130,7 +130,7 @@
struct atm_pseudohdr atmdst, *ad;
struct mbuf *m = m0;
struct atmllc *atmllc;
- struct atmllc *llc_hdr = NULL;
+ const struct atmllc *llc_hdr = NULL;
u_int32_t atm_flags;
#ifdef MAC
@@ -174,7 +174,7 @@
* (atm pseudo header (4) + LLC/SNAP (8))
*/
bcopy(dst->sa_data, &atmdst, sizeof(atmdst));
- llc_hdr = (struct atmllc *)(dst->sa_data +
+ llc_hdr = (const struct atmllc *)(dst->sa_data +
sizeof(atmdst));
break;
@@ -191,7 +191,7 @@
atm_flags = ATM_PH_FLAGS(&atmdst);
if (atm_flags & ATM_PH_LLCSNAP)
sz += 8; /* sizeof snap == 8 */
- M_PREPEND(m, sz, M_DONTWAIT);
+ M_PREPEND(m, sz, M_NOWAIT);
if (m == 0)
senderr(ENOBUFS);
ad = mtod(m, struct atm_pseudohdr *);
Modified: trunk/sys/net/if_bridge.c
===================================================================
--- trunk/sys/net/if_bridge.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_bridge.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -76,7 +76,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_bridge.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_bridge.c 324116 2017-09-30 10:16:15Z kp $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -101,7 +101,6 @@
#include <sys/proc.h>
#include <sys/lock.h>
#include <sys/mutex.h>
-#include <sys/rwlock.h>
#include <net/bpf.h>
#include <net/if.h>
@@ -120,6 +119,7 @@
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
+#include <netinet6/in6_ifattach.h>
#endif
#if defined(INET) || defined(INET6)
#include <netinet/ip_carp.h>
@@ -132,8 +132,6 @@
#include <net/if_vlan_var.h>
#include <net/route.h>
-#include <netinet/ip_fw.h>
-#include <netpfil/ipfw/ip_fw_private.h>
/*
* Size of the route hash table. Must be a power of two.
@@ -168,7 +166,8 @@
/*
* List of capabilities to possibly mask on the member interface.
*/
-#define BRIDGE_IFCAPS_MASK (IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM)
+#define BRIDGE_IFCAPS_MASK (IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM|\
+ IFCAP_TXCSUM_IPV6)
/*
* List of capabilities to strip
@@ -246,11 +245,12 @@
static void bridge_init(void *);
static void bridge_dummynet(struct mbuf *, struct ifnet *);
static void bridge_stop(struct ifnet *, int);
-static void bridge_start(struct ifnet *);
+static int bridge_transmit(struct ifnet *, struct mbuf *);
+static void bridge_qflush(struct ifnet *);
static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
static int bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
-static void bridge_enqueue(struct bridge_softc *, struct ifnet *,
+static int bridge_enqueue(struct bridge_softc *, struct ifnet *,
struct mbuf *);
static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
@@ -273,7 +273,7 @@
static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
uint16_t);
-static int bridge_rtable_init(struct bridge_softc *);
+static void bridge_rtable_init(struct bridge_softc *);
static void bridge_rtable_fini(struct bridge_softc *);
static int bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
@@ -332,7 +332,7 @@
#ifdef INET6
static int bridge_ip6_checkbasic(struct mbuf **mp);
#endif /* INET6 */
-static int bridge_fragment(struct ifnet *, struct mbuf *,
+static int bridge_fragment(struct ifnet *, struct mbuf **mp,
struct ether_header *, int, struct llc *);
static void bridge_linkstate(struct ifnet *ifp);
static void bridge_linkcheck(struct bridge_softc *sc);
@@ -384,6 +384,12 @@
&bridge_inherit_mac, 0,
"Inherit MAC address from the first bridge member");
+static VNET_DEFINE(int, allow_llz_overlap) = 0;
+#define V_allow_llz_overlap VNET(allow_llz_overlap)
+SYSCTL_VNET_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap, CTLFLAG_RW,
+ &VNET_NAME(allow_llz_overlap), 0, "Allow overlap of link-local scope "
+ "zones of a bridge interface and the member interfaces");
+
struct bridge_control {
int (*bc_func)(struct bridge_softc *, void *);
int bc_argsize;
@@ -484,7 +490,8 @@
LIST_HEAD(, bridge_softc) bridge_list;
-IFC_SIMPLE_DECLARE(bridge, 0);
+static struct if_clone *bridge_cloner;
+static const char bridge_name[] = "bridge";
static int
bridge_modevent(module_t mod, int type, void *data)
@@ -493,7 +500,8 @@
switch (type) {
case MOD_LOAD:
mtx_init(&bridge_list_mtx, "if_bridge list", NULL, MTX_DEF);
- if_clone_attach(&bridge_cloner);
+ bridge_cloner = if_clone_simple(bridge_name,
+ bridge_clone_create, bridge_clone_destroy, 0);
bridge_rtnode_zone = uma_zcreate("bridge_rtnode",
sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
@@ -509,7 +517,7 @@
case MOD_UNLOAD:
EVENTHANDLER_DEREGISTER(ifnet_departure_event,
bridge_detach_cookie);
- if_clone_detach(&bridge_cloner);
+ if_clone_detach(bridge_cloner);
uma_zdestroy(bridge_rtnode_zone);
bridge_input_p = NULL;
bridge_output_p = NULL;
@@ -530,10 +538,11 @@
};
DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_bridge, 1);
MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1);
/*
- * handler for net.link.bridge.pfil_ipfw
+ * handler for net.link.bridge.ipfw
*/
static int
sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS)
@@ -598,15 +607,13 @@
LIST_INIT(&sc->sc_spanlist);
ifp->if_softc = sc;
- if_initname(ifp, ifc->ifc_name, unit);
+ if_initname(ifp, bridge_name, unit);
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = bridge_ioctl;
- ifp->if_start = bridge_start;
+ ifp->if_transmit = bridge_transmit;
+ ifp->if_qflush = bridge_qflush;
ifp->if_init = bridge_init;
ifp->if_type = IFT_BRIDGE;
- IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
- ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
- IFQ_SET_READY(&ifp->if_snd);
/*
* Generate an ethernet address with a locally administered address.
@@ -618,7 +625,7 @@
*/
fb = 0;
getcredhostid(curthread->td_ucred, &hostid);
- for (retry = 1; retry != 0;) {
+ do {
if (fb || hostid == 0) {
arc4rand(sc->sc_defaddr, ETHER_ADDR_LEN, 1);
sc->sc_defaddr[0] &= ~1;/* clear multicast bit */
@@ -638,11 +645,13 @@
LIST_FOREACH(sc2, &bridge_list, sc_list) {
bifp = sc2->sc_ifp;
if (memcmp(sc->sc_defaddr,
- IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0)
+ IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
retry = 1;
+ break;
+ }
}
mtx_unlock(&bridge_list_mtx);
- }
+ } while (retry == 1);
bstp_attach(&sc->sc_stp, &bridge_ops);
ether_ifattach(ifp, sc->sc_defaddr);
@@ -690,7 +699,7 @@
bstp_detach(&sc->sc_stp);
ether_ifdetach(ifp);
- if_free_type(ifp, IFT_ETHER);
+ if_free(ifp);
/* Tear down the routing table. */
bridge_rtable_fini(sc);
@@ -853,6 +862,7 @@
mask &= bif->bif_savedcaps;
}
+ BRIDGE_XLOCK(sc);
LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
enabled = bif->bif_ifp->if_capenable;
enabled &= ~BRIDGE_IFCAPS_STRIP;
@@ -859,8 +869,11 @@
/* strip off mask bits and enable them again if allowed */
enabled &= ~BRIDGE_IFCAPS_MASK;
enabled |= mask;
+ BRIDGE_UNLOCK(sc);
bridge_set_ifcap(sc, bif, enabled);
+ BRIDGE_LOCK(sc);
}
+ BRIDGE_XDROP(sc);
}
@@ -871,6 +884,8 @@
struct ifreq ifr;
int error;
+ BRIDGE_UNLOCK_ASSERT(sc);
+
bzero(&ifr, sizeof(ifr));
ifr.ifr_reqcap = set;
@@ -978,9 +993,12 @@
case IFT_ETHER:
case IFT_L2VLAN:
/*
- * Take the interface out of promiscuous mode.
+ * Take the interface out of promiscuous mode, but only
+ * if it was promiscuous in the first place. It might
+ * not be if we're in the bridge_ioctl_add() error path.
*/
- (void) ifpromisc(ifs, 0);
+ if (ifs->if_flags & IFF_PROMISC)
+ (void) ifpromisc(ifs, 0);
break;
case IFT_GIF:
@@ -1042,14 +1060,6 @@
if (ifs->if_bridge != NULL)
return (EBUSY);
- bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
- if (bif == NULL)
- return (ENOMEM);
-
- bif->bif_ifp = ifs;
- bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
- bif->bif_savedcaps = ifs->if_capenable;
-
switch (ifs->if_type) {
case IFT_ETHER:
case IFT_L2VLAN:
@@ -1057,10 +1067,53 @@
/* permitted interface types */
break;
default:
- error = EINVAL;
- goto out;
+ return (EINVAL);
}
+#ifdef INET6
+ /*
+ * Two valid inet6 addresses with link-local scope must not be
+ * on the parent interface and the member interfaces at the
+ * same time. This restriction is needed to prevent violation
+ * of link-local scope zone. Attempts to add a member
+ * interface which has inet6 addresses when the parent has
+ * inet6 triggers removal of all inet6 addresses on the member
+ * interface.
+ */
+
+ /* Check if the parent interface has a link-local scope addr. */
+ if (V_allow_llz_overlap == 0 &&
+ in6ifa_llaonifp(sc->sc_ifp) != NULL) {
+ /*
+ * If any, remove all inet6 addresses from the member
+ * interfaces.
+ */
+ BRIDGE_XLOCK(sc);
+ LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+ if (in6ifa_llaonifp(bif->bif_ifp)) {
+ BRIDGE_UNLOCK(sc);
+ in6_ifdetach(bif->bif_ifp);
+ BRIDGE_LOCK(sc);
+ if_printf(sc->sc_ifp,
+ "IPv6 addresses on %s have been removed "
+ "before adding it as a member to prevent "
+ "IPv6 address scope violation.\n",
+ bif->bif_ifp->if_xname);
+ }
+ }
+ BRIDGE_XDROP(sc);
+ if (in6ifa_llaonifp(ifs)) {
+ BRIDGE_UNLOCK(sc);
+ in6_ifdetach(ifs);
+ BRIDGE_LOCK(sc);
+ if_printf(sc->sc_ifp,
+ "IPv6 addresses on %s have been removed "
+ "before adding it as a member to prevent "
+ "IPv6 address scope violation.\n",
+ ifs->if_xname);
+ }
+ }
+#endif
/* Allow the first Ethernet member to define the MTU */
if (LIST_EMPTY(&sc->sc_iflist))
sc->sc_ifp->if_mtu = ifs->if_mtu;
@@ -1067,10 +1120,17 @@
else if (sc->sc_ifp->if_mtu != ifs->if_mtu) {
if_printf(sc->sc_ifp, "invalid MTU: %lu(%s) != %lu\n",
ifs->if_mtu, ifs->if_xname, sc->sc_ifp->if_mtu);
- error = EINVAL;
- goto out;
+ return (EINVAL);
}
+ bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (bif == NULL)
+ return (ENOMEM);
+
+ bif->bif_ifp = ifs;
+ bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
+ bif->bif_savedcaps = ifs->if_capenable;
+
/*
* Assign the interface's MAC address to the bridge if it's the first
* member and the MAC address of the bridge has not been changed from
@@ -1105,13 +1165,9 @@
BRIDGE_LOCK(sc);
break;
}
+
if (error)
bridge_delete_member(sc, bif, 0);
-out:
- if (error) {
- if (bif != NULL)
- free(bif, M_DEVBUF);
- }
return (error);
}
@@ -1784,7 +1840,7 @@
* Enqueue a packet on a bridge member interface.
*
*/
-static void
+static int
bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
{
int len, err = 0;
@@ -1791,13 +1847,12 @@
short mflags;
struct mbuf *m0;
- len = m->m_pkthdr.len;
- mflags = m->m_flags;
-
/* We may be sending a fragment so traverse the mbuf */
for (; m; m = m0) {
m0 = m->m_nextpkt;
m->m_nextpkt = NULL;
+ len = m->m_pkthdr.len;
+ mflags = m->m_flags;
/*
* If underlying interface can not do VLAN tag insertion itself
@@ -1815,18 +1870,20 @@
m->m_flags &= ~M_VLANTAG;
}
+ M_ASSERTPKTHDR(m); /* We shouldn't transmit mbuf without pkthdr */
if ((err = dst_ifp->if_transmit(dst_ifp, m))) {
m_freem(m0);
+ sc->sc_ifp->if_oerrors++;
break;
}
- }
- if (err == 0) {
sc->sc_ifp->if_opackets++;
sc->sc_ifp->if_obytes += len;
if (mflags & M_MCAST)
sc->sc_ifp->if_omcasts++;
}
+
+ return (err);
}
/*
@@ -1952,7 +2009,7 @@
used = 1;
mc = m;
} else {
- mc = m_copypacket(m, M_DONTWAIT);
+ mc = m_copypacket(m, M_NOWAIT);
if (mc == NULL) {
sc->sc_ifp->if_oerrors++;
continue;
@@ -1985,47 +2042,45 @@
}
/*
- * bridge_start:
+ * bridge_transmit:
*
- * Start output on a bridge.
+ * Do output on a bridge.
*
*/
-static void
-bridge_start(struct ifnet *ifp)
+static int
+bridge_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct bridge_softc *sc;
- struct mbuf *m;
struct ether_header *eh;
struct ifnet *dst_if;
+ int error = 0;
sc = ifp->if_softc;
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- for (;;) {
- IFQ_DEQUEUE(&ifp->if_snd, m);
- if (m == 0)
- break;
- ETHER_BPF_MTAP(ifp, m);
+ ETHER_BPF_MTAP(ifp, m);
- eh = mtod(m, struct ether_header *);
- dst_if = NULL;
+ eh = mtod(m, struct ether_header *);
- BRIDGE_LOCK(sc);
- if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
- dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1);
- }
+ BRIDGE_LOCK(sc);
+ if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) &&
+ (dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1)) != NULL) {
+ BRIDGE_UNLOCK(sc);
+ error = bridge_enqueue(sc, dst_if, m);
+ } else
+ bridge_broadcast(sc, ifp, m, 0);
- if (dst_if == NULL)
- bridge_broadcast(sc, ifp, m, 0);
- else {
- BRIDGE_UNLOCK(sc);
- bridge_enqueue(sc, dst_if, m);
- }
- }
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ return (error);
}
/*
+ * The ifp->if_qflush entry point for if_bridge(4) is no-op.
+ */
+static void
+bridge_qflush(struct ifnet *ifp __unused)
+{
+}
+
+/*
* bridge_forward:
*
* The forwarding function of the bridge.
@@ -2243,7 +2298,7 @@
* for bridge processing; return the original packet for
* local processing.
*/
- mc = m_dup(m, M_DONTWAIT);
+ mc = m_dup(m, M_NOWAIT);
if (mc == NULL) {
BRIDGE_UNLOCK(sc);
return (m);
@@ -2260,7 +2315,7 @@
*/
KASSERT(bifp->if_bridge == NULL,
("loop created in bridge_input"));
- mc2 = m_dup(m, M_DONTWAIT);
+ mc2 = m_dup(m, M_NOWAIT);
if (mc2 != NULL) {
/* Keep the layer3 header aligned */
int i = min(mc2->m_pkthdr.len, max_protohdr);
@@ -2310,6 +2365,7 @@
if ((iface)->if_type == IFT_BRIDGE) { \
ETHER_BPF_MTAP(iface, m); \
iface->if_ipackets++; \
+ iface->if_ibytes += m->m_pkthdr.len; \
/* Filter on the physical interface. */ \
if (pfil_local_phys && \
(PFIL_HOOKED(&V_inet_pfil_hook) \
@@ -2319,6 +2375,7 @@
BRIDGE_UNLOCK(sc); \
return (NULL); \
} \
+ eh = mtod(m, struct ether_header *); \
} \
} \
if (bif->bif_flags & IFBIF_LEARNING) { \
@@ -2436,7 +2493,7 @@
mc = m;
used = 1;
} else {
- mc = m_dup(m, M_DONTWAIT);
+ mc = m_dup(m, M_NOWAIT);
if (mc == NULL) {
sc->sc_ifp->if_oerrors++;
continue;
@@ -2499,7 +2556,7 @@
if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
continue;
- mc = m_copypacket(m, M_DONTWAIT);
+ mc = m_copypacket(m, M_NOWAIT);
if (mc == NULL) {
sc->sc_ifp->if_oerrors++;
continue;
@@ -2744,24 +2801,19 @@
*
* Initialize the route table for this bridge.
*/
-static int
+static void
bridge_rtable_init(struct bridge_softc *sc)
{
int i;
sc->sc_rthash = malloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
- M_DEVBUF, M_NOWAIT);
- if (sc->sc_rthash == NULL)
- return (ENOMEM);
+ M_DEVBUF, M_WAITOK);
for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
LIST_INIT(&sc->sc_rthash[i]);
sc->sc_rthash_key = arc4random();
-
LIST_INIT(&sc->sc_rtlist);
-
- return (0);
}
/*
@@ -2985,7 +3037,6 @@
{
int snap, error, i, hlen;
struct ether_header *eh1, eh2;
- struct ip_fw_args args;
struct ip *ip;
struct llc llc1;
u_int16_t ether_type;
@@ -3059,6 +3110,16 @@
goto bad;
}
+ /* Run the packet through pfil before stripping link headers */
+ if (PFIL_HOOKED(&V_link_pfil_hook) && pfil_ipfw != 0 &&
+ dir == PFIL_OUT && ifp != NULL) {
+
+ error = pfil_run_hooks(&V_link_pfil_hook, mp, ifp, dir, NULL);
+
+ if (*mp == NULL || error != 0) /* packet consumed by filter */
+ return (error);
+ }
+
/* Strip off the Ethernet header and keep a copy. */
m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
m_adj(*mp, ETHER_HDR_LEN);
@@ -3089,63 +3150,6 @@
goto bad;
}
- /* XXX this section is also in if_ethersubr.c */
- // XXX PFIL_OUT or DIR_OUT ?
- if (V_ip_fw_chk_ptr && pfil_ipfw != 0 &&
- dir == PFIL_OUT && ifp != NULL) {
- struct m_tag *mtag;
-
- error = -1;
- /* fetch the start point from existing tags, if any */
- mtag = m_tag_locate(*mp, MTAG_IPFW_RULE, 0, NULL);
- if (mtag == NULL) {
- args.rule.slot = 0;
- } else {
- struct ipfw_rule_ref *r;
-
- /* XXX can we free the tag after use ? */
- mtag->m_tag_id = PACKET_TAG_NONE;
- r = (struct ipfw_rule_ref *)(mtag + 1);
- /* packet already partially processed ? */
- if (r->info & IPFW_ONEPASS)
- goto ipfwpass;
- args.rule = *r;
- }
-
- args.m = *mp;
- args.oif = ifp;
- args.next_hop = NULL;
- args.next_hop6 = NULL;
- args.eh = &eh2;
- args.inp = NULL; /* used by ipfw uid/gid/jail rules */
- i = V_ip_fw_chk_ptr(&args);
- *mp = args.m;
-
- if (*mp == NULL)
- return (error);
-
- if (ip_dn_io_ptr && (i == IP_FW_DUMMYNET)) {
-
- /* put the Ethernet header back on */
- M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
- if (*mp == NULL)
- return (error);
- bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
-
- /*
- * Pass the pkt to dummynet, which consumes it. The
- * packet will return to us via bridge_dummynet().
- */
- args.oif = ifp;
- ip_dn_io_ptr(mp, DIR_FWD | PROTO_IFB, &args);
- return (error);
- }
-
- if (i != IP_FW_PASS) /* drop */
- goto bad;
- }
-
-ipfwpass:
error = 0;
/*
@@ -3154,15 +3158,6 @@
switch (ether_type) {
case ETHERTYPE_IP:
/*
- * before calling the firewall, swap fields the same as
- * IP does. here we assume the header is contiguous
- */
- ip = mtod(*mp, struct ip *);
-
- ip->ip_len = ntohs(ip->ip_len);
- ip->ip_off = ntohs(ip->ip_off);
-
- /*
* Run pfil on the member interface and the bridge, both can
* be skipped by clearing pfil_member or pfil_bridge.
*
@@ -3191,16 +3186,18 @@
break;
/* check if we need to fragment the packet */
+ /* bridge_fragment generates a mbuf chain of packets */
+ /* that already include eth headers */
if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
i = (*mp)->m_pkthdr.len;
if (i > ifp->if_mtu) {
- error = bridge_fragment(ifp, *mp, &eh2, snap,
+ error = bridge_fragment(ifp, mp, &eh2, snap,
&llc1);
return (error);
}
}
- /* Recalculate the ip checksum and restore byte ordering */
+ /* Recalculate the ip checksum. */
ip = mtod(*mp, struct ip *);
hlen = ip->ip_hl << 2;
if (hlen < sizeof(struct ip))
@@ -3212,8 +3209,6 @@
if (ip == NULL)
goto bad;
}
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
if (hlen == sizeof(struct ip))
ip->ip_sum = in_cksum_hdr(ip);
@@ -3258,13 +3253,13 @@
* Finally, put everything back the way it was and return
*/
if (snap) {
- M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
+ M_PREPEND(*mp, sizeof(struct llc), M_NOWAIT);
if (*mp == NULL)
return (error);
bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
}
- M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
+ M_PREPEND(*mp, ETHER_HDR_LEN, M_NOWAIT);
if (*mp == NULL)
return (error);
bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
@@ -3435,56 +3430,77 @@
/*
* bridge_fragment:
*
- * Return a fragmented mbuf chain.
+ * Fragment mbuf chain in multiple packets and prepend ethernet header.
*/
static int
-bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
+bridge_fragment(struct ifnet *ifp, struct mbuf **mp, struct ether_header *eh,
int snap, struct llc *llc)
{
- struct mbuf *m0;
+ struct mbuf *m = *mp, *nextpkt = NULL, *mprev = NULL, *mcur = NULL;
struct ip *ip;
int error = -1;
if (m->m_len < sizeof(struct ip) &&
(m = m_pullup(m, sizeof(struct ip))) == NULL)
- goto out;
+ goto dropit;
ip = mtod(m, struct ip *);
- error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
- CSUM_DELAY_IP);
+ m->m_pkthdr.csum_flags |= CSUM_IP;
+ error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist);
if (error)
- goto out;
+ goto dropit;
- /* walk the chain and re-add the Ethernet header */
- for (m0 = m; m0; m0 = m0->m_nextpkt) {
- if (error == 0) {
- if (snap) {
- M_PREPEND(m0, sizeof(struct llc), M_DONTWAIT);
- if (m0 == NULL) {
- error = ENOBUFS;
- continue;
- }
- bcopy(llc, mtod(m0, caddr_t),
- sizeof(struct llc));
- }
- M_PREPEND(m0, ETHER_HDR_LEN, M_DONTWAIT);
- if (m0 == NULL) {
+ /*
+ * Walk the chain and re-add the Ethernet header for
+ * each mbuf packet.
+ */
+ for (mcur = m; mcur; mcur = mcur->m_nextpkt) {
+ nextpkt = mcur->m_nextpkt;
+ mcur->m_nextpkt = NULL;
+ if (snap) {
+ M_PREPEND(mcur, sizeof(struct llc), M_NOWAIT);
+ if (mcur == NULL) {
error = ENOBUFS;
- continue;
+ if (mprev != NULL)
+ mprev->m_nextpkt = nextpkt;
+ goto dropit;
}
- bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
- } else
- m_freem(m);
+ bcopy(llc, mtod(mcur, caddr_t),sizeof(struct llc));
+ }
+
+ M_PREPEND(mcur, ETHER_HDR_LEN, M_NOWAIT);
+ if (mcur == NULL) {
+ error = ENOBUFS;
+ if (mprev != NULL)
+ mprev->m_nextpkt = nextpkt;
+ goto dropit;
+ }
+ bcopy(eh, mtod(mcur, caddr_t), ETHER_HDR_LEN);
+
+ /*
+ * The previous two M_PREPEND could have inserted one or two
+ * mbufs in front so we have to update the previous packet's
+ * m_nextpkt.
+ */
+ mcur->m_nextpkt = nextpkt;
+ if (mprev != NULL)
+ mprev->m_nextpkt = mcur;
+ else {
+ /* The first mbuf in the original chain needs to be
+ * updated. */
+ *mp = mcur;
+ }
+ mprev = mcur;
}
- if (error == 0)
- KMOD_IPSTAT_INC(ips_fragmented);
-
+ KMOD_IPSTAT_INC(ips_fragmented);
return (error);
-out:
- if (m != NULL)
- m_freem(m);
+dropit:
+ for (mcur = *mp; mcur; mcur = m) { /* droping the full packet chain */
+ m = mcur->m_nextpkt;
+ m_freem(mcur);
+ }
return (error);
}
Modified: trunk/sys/net/if_bridgevar.h
===================================================================
--- trunk/sys/net/if_bridgevar.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_bridgevar.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -68,7 +68,7 @@
*
* OpenBSD: if_bridge.h,v 1.14 2001/03/22 03:48:29 jason Exp
*
- * $FreeBSD: stable/9/sys/net/if_bridgevar.h 173320 2007-11-04 08:32:27Z thompsa $
+ * $FreeBSD: stable/10/sys/net/if_bridgevar.h 313066 2017-02-01 21:44:50Z kp $
*/
/*
@@ -281,6 +281,7 @@
#define BRIDGE_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx)
#define BRIDGE_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx)
#define BRIDGE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_mtx, MA_OWNED)
+#define BRIDGE_UNLOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_mtx, MA_NOTOWNED)
#define BRIDGE_LOCK2REF(_sc, _err) do { \
mtx_assert(&(_sc)->sc_mtx, MA_OWNED); \
if ((_sc)->sc_iflist_xcnt > 0) \
Modified: trunk/sys/net/if_clone.c
===================================================================
--- trunk/sys/net/if_clone.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_clone.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,6 @@
/* $MidnightBSD$ */
/*-
+ * Copyright (c) 2012 Gleb Smirnoff <glebius at FreeBSD.org>
* Copyright (c) 1980, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
@@ -28,7 +29,7 @@
* SUCH DAMAGE.
*
* @(#)if.c 8.5 (Berkeley) 1/9/95
- * $FreeBSD: stable/9/sys/net/if_clone.c 215701 2010-11-22 19:32:54Z dim $
+ * $FreeBSD: stable/10/sys/net/if_clone.c 324813 2017-10-21 10:48:06Z avos $
*/
#include <sys/param.h>
@@ -43,19 +44,65 @@
#include <net/if.h>
#include <net/if_clone.h>
-#if 0
-#include <net/if_dl.h>
-#endif
-#include <net/if_types.h>
#include <net/if_var.h>
#include <net/radix.h>
#include <net/route.h>
#include <net/vnet.h>
+/* Current IF_MAXUNIT expands maximum to 5 characters. */
+#define IFCLOSIZ (IFNAMSIZ - 5)
+
+/*
+ * Structure describing a `cloning' interface.
+ *
+ * List of locks
+ * (c) const until freeing
+ * (d) driver specific data, may need external protection.
+ * (e) locked by if_cloners_mtx
+ * (i) locked by ifc_mtx mtx
+ */
+struct if_clone {
+ char ifc_name[IFCLOSIZ]; /* (c) Name of device, e.g. `gif' */
+ struct unrhdr *ifc_unrhdr; /* (c) alloc_unr(9) header */
+ int ifc_maxunit; /* (c) maximum unit number */
+ long ifc_refcnt; /* (i) Reference count. */
+ LIST_HEAD(, ifnet) ifc_iflist; /* (i) List of cloned interfaces */
+ struct mtx ifc_mtx; /* Mutex to protect members. */
+
+ enum { SIMPLE, ADVANCED } ifc_type; /* (c) */
+
+ /* (c) Driver specific cloning functions. Called with no locks held. */
+ union {
+ struct { /* advanced cloner */
+ ifc_match_t *_ifc_match;
+ ifc_create_t *_ifc_create;
+ ifc_destroy_t *_ifc_destroy;
+ } A;
+ struct { /* simple cloner */
+ ifcs_create_t *_ifcs_create;
+ ifcs_destroy_t *_ifcs_destroy;
+ int _ifcs_minifs; /* minimum ifs */
+
+ } S;
+ } U;
+#define ifc_match U.A._ifc_match
+#define ifc_create U.A._ifc_create
+#define ifc_destroy U.A._ifc_destroy
+#define ifcs_create U.S._ifcs_create
+#define ifcs_destroy U.S._ifcs_destroy
+#define ifcs_minifs U.S._ifcs_minifs
+
+ LIST_ENTRY(if_clone) ifc_list; /* (e) On list of cloners */
+};
+
static void if_clone_free(struct if_clone *ifc);
static int if_clone_createif(struct if_clone *ifc, char *name, size_t len,
caddr_t params);
+static int ifc_simple_match(struct if_clone *, const char *);
+static int ifc_simple_create(struct if_clone *, char *, size_t, caddr_t);
+static int ifc_simple_destroy(struct if_clone *, struct ifnet *);
+
static struct mtx if_cloners_mtx;
static VNET_DEFINE(int, if_cloners_count);
VNET_DEFINE(LIST_HEAD(, if_clone), if_cloners);
@@ -139,18 +186,25 @@
/* Try to find an applicable cloner for this request */
IF_CLONERS_LOCK();
- LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
- if (ifc->ifc_match(ifc, name)) {
- break;
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list)
+ if (ifc->ifc_type == SIMPLE) {
+ if (ifc_simple_match(ifc, name))
+ break;
+ } else {
+ if (ifc->ifc_match(ifc, name))
+ break;
}
- }
#ifdef VIMAGE
if (ifc == NULL && !IS_DEFAULT_VNET(curvnet)) {
CURVNET_SET_QUIET(vnet0);
- LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
- if (ifc->ifc_match(ifc, name))
- break;
- }
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list)
+ if (ifc->ifc_type == SIMPLE) {
+ if (ifc_simple_match(ifc, name))
+ break;
+ } else {
+ if (ifc->ifc_match(ifc, name))
+ break;
+ }
CURVNET_RESTORE();
}
#endif
@@ -174,7 +228,10 @@
if (ifunit(name) != NULL)
return (EEXIST);
- err = (*ifc->ifc_create)(ifc, name, len, params);
+ if (ifc->ifc_type == SIMPLE)
+ err = ifc_simple_create(ifc, name, len, params);
+ else
+ err = (*ifc->ifc_create)(ifc, name, len, params);
if (!err) {
ifp = ifunit(name);
@@ -215,10 +272,14 @@
#ifdef VIMAGE
if (ifc == NULL && !IS_DEFAULT_VNET(curvnet)) {
CURVNET_SET_QUIET(vnet0);
- LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
- if (ifc->ifc_match(ifc, name))
- break;
- }
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list)
+ if (ifc->ifc_type == SIMPLE) {
+ if (ifc_simple_match(ifc, name))
+ break;
+ } else {
+ if (ifc->ifc_match(ifc, name))
+ break;
+ }
CURVNET_RESTORE();
}
#endif
@@ -242,7 +303,7 @@
int err;
struct ifnet *ifcifp;
- if (ifc->ifc_destroy == NULL)
+ if (ifc->ifc_type == ADVANCED && ifc->ifc_destroy == NULL)
return(EOPNOTSUPP);
/*
@@ -267,7 +328,10 @@
if_delgroup(ifp, ifc->ifc_name);
- err = (*ifc->ifc_destroy)(ifc, ifp);
+ if (ifc->ifc_type == SIMPLE)
+ err = ifc_simple_destroy(ifc, ifp);
+ else
+ err = (*ifc->ifc_destroy)(ifc, ifp);
if (err != 0) {
if_addgroup(ifp, ifc->ifc_name);
@@ -280,38 +344,99 @@
return (err);
}
-/*
- * Register a network interface cloner.
- */
-void
-if_clone_attach(struct if_clone *ifc)
+static struct if_clone *
+if_clone_alloc(const char *name, int maxunit)
{
- int len, maxclone;
+ struct if_clone *ifc;
- /*
- * Compute bitmap size and allocate it.
- */
- maxclone = ifc->ifc_maxunit + 1;
- len = maxclone >> 3;
- if ((len << 3) < maxclone)
- len++;
- ifc->ifc_units = malloc(len, M_CLONE, M_WAITOK | M_ZERO);
- ifc->ifc_bmlen = len;
+ KASSERT(name != NULL, ("%s: no name\n", __func__));
+
+ ifc = malloc(sizeof(struct if_clone), M_CLONE, M_WAITOK | M_ZERO);
+ strncpy(ifc->ifc_name, name, IFCLOSIZ-1);
IF_CLONE_LOCK_INIT(ifc);
IF_CLONE_ADDREF(ifc);
+ ifc->ifc_maxunit = maxunit ? maxunit : IF_MAXUNIT;
+ ifc->ifc_unrhdr = new_unrhdr(0, ifc->ifc_maxunit, &ifc->ifc_mtx);
+ LIST_INIT(&ifc->ifc_iflist);
+ return (ifc);
+}
+
+static int
+if_clone_attach(struct if_clone *ifc)
+{
+ struct if_clone *ifc1;
+
IF_CLONERS_LOCK();
+ LIST_FOREACH(ifc1, &V_if_cloners, ifc_list)
+ if (strcmp(ifc->ifc_name, ifc1->ifc_name) == 0) {
+ IF_CLONERS_UNLOCK();
+ IF_CLONE_REMREF(ifc);
+ return (EEXIST);
+ }
LIST_INSERT_HEAD(&V_if_cloners, ifc, ifc_list);
V_if_cloners_count++;
IF_CLONERS_UNLOCK();
- LIST_INIT(&ifc->ifc_iflist);
+ return (0);
+}
- if (ifc->ifc_attach != NULL)
- (*ifc->ifc_attach)(ifc);
+struct if_clone *
+if_clone_advanced(const char *name, u_int maxunit, ifc_match_t match,
+ ifc_create_t create, ifc_destroy_t destroy)
+{
+ struct if_clone *ifc;
+
+ ifc = if_clone_alloc(name, maxunit);
+ ifc->ifc_type = ADVANCED;
+ ifc->ifc_match = match;
+ ifc->ifc_create = create;
+ ifc->ifc_destroy = destroy;
+
+ if (if_clone_attach(ifc) != 0) {
+ if_clone_free(ifc);
+ return (NULL);
+ }
+
EVENTHANDLER_INVOKE(if_clone_event, ifc);
+
+ return (ifc);
}
+struct if_clone *
+if_clone_simple(const char *name, ifcs_create_t create, ifcs_destroy_t destroy,
+ u_int minifs)
+{
+ struct if_clone *ifc;
+ u_int unit;
+
+ ifc = if_clone_alloc(name, 0);
+ ifc->ifc_type = SIMPLE;
+ ifc->ifcs_create = create;
+ ifc->ifcs_destroy = destroy;
+ ifc->ifcs_minifs = minifs;
+
+ if (if_clone_attach(ifc) != 0) {
+ if_clone_free(ifc);
+ return (NULL);
+ }
+
+ for (unit = 0; unit < minifs; unit++) {
+ char name[IFNAMSIZ];
+ int error;
+
+ snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, unit);
+ error = if_clone_createif(ifc, name, IFNAMSIZ, NULL);
+ KASSERT(error == 0,
+ ("%s: failed to create required interface %s",
+ __func__, name));
+ }
+
+ EVENTHANDLER_INVOKE(if_clone_event, ifc);
+
+ return (ifc);
+}
+
/*
* Unregister a network interface cloner.
*/
@@ -318,7 +443,6 @@
void
if_clone_detach(struct if_clone *ifc)
{
- struct ifc_simple_data *ifcs = ifc->ifc_data;
IF_CLONERS_LOCK();
LIST_REMOVE(ifc, ifc_list);
@@ -326,8 +450,8 @@
IF_CLONERS_UNLOCK();
/* Allow all simples to be destroyed */
- if (ifc->ifc_attach == ifc_simple_attach)
- ifcs->ifcs_minifs = 0;
+ if (ifc->ifc_type == SIMPLE)
+ ifc->ifcs_minifs = 0;
/* destroy all interfaces for this cloner */
while (!LIST_EMPTY(&ifc->ifc_iflist))
@@ -339,16 +463,13 @@
static void
if_clone_free(struct if_clone *ifc)
{
- for (int bytoff = 0; bytoff < ifc->ifc_bmlen; bytoff++) {
- KASSERT(ifc->ifc_units[bytoff] == 0x00,
- ("ifc_units[%d] is not empty", bytoff));
- }
KASSERT(LIST_EMPTY(&ifc->ifc_iflist),
("%s: ifc_iflist not empty", __func__));
IF_CLONE_LOCK_DESTROY(ifc);
- free(ifc->ifc_units, M_CLONE);
+ delete_unrhdr(ifc->ifc_unrhdr);
+ free(ifc, M_CLONE);
}
/*
@@ -405,6 +526,49 @@
}
/*
+ * if_clone_findifc() looks up ifnet from the current
+ * cloner list, and returns ifc if found. Note that ifc_refcnt
+ * is incremented.
+ */
+struct if_clone *
+if_clone_findifc(struct ifnet *ifp)
+{
+ struct if_clone *ifc, *ifc0;
+ struct ifnet *ifcifp;
+
+ ifc0 = NULL;
+ IF_CLONERS_LOCK();
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
+ IF_CLONE_LOCK(ifc);
+ LIST_FOREACH(ifcifp, &ifc->ifc_iflist, if_clones) {
+ if (ifp == ifcifp) {
+ ifc0 = ifc;
+ IF_CLONE_ADDREF_LOCKED(ifc);
+ break;
+ }
+ }
+ IF_CLONE_UNLOCK(ifc);
+ if (ifc0 != NULL)
+ break;
+ }
+ IF_CLONERS_UNLOCK();
+
+ return (ifc0);
+}
+
+/*
+ * if_clone_addgroup() decrements ifc_refcnt because it is called after
+ * if_clone_findifc().
+ */
+void
+if_clone_addgroup(struct ifnet *ifp, struct if_clone *ifc)
+{
+
+ if_addgroup(ifp, ifc->ifc_name);
+ IF_CLONE_REMREF(ifc);
+}
+
+/*
* A utility function to extract unit numbers from interface names of
* the form name###.
*
@@ -439,101 +603,67 @@
return (0);
}
-int
-ifc_alloc_unit(struct if_clone *ifc, int *unit)
+static int
+ifc_alloc_unit_specific(struct if_clone *ifc, int *unit)
{
- int wildcard, bytoff, bitoff;
- int err = 0;
+ char name[IFNAMSIZ];
- IF_CLONE_LOCK(ifc);
+ if (*unit > ifc->ifc_maxunit)
+ return (ENOSPC);
- bytoff = bitoff = 0;
- wildcard = (*unit < 0);
- /*
- * Find a free unit if none was given.
- */
- if (wildcard) {
- while ((bytoff < ifc->ifc_bmlen)
- && (ifc->ifc_units[bytoff] == 0xff))
- bytoff++;
- if (bytoff >= ifc->ifc_bmlen) {
- err = ENOSPC;
- goto done;
- }
- while ((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0)
- bitoff++;
- *unit = (bytoff << 3) + bitoff;
- }
+ if (alloc_unr_specific(ifc->ifc_unrhdr, *unit) == -1)
+ return (EEXIST);
- if (*unit > ifc->ifc_maxunit) {
- err = ENOSPC;
- goto done;
+ snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, *unit);
+ if (ifunit(name) != NULL) {
+ free_unr(ifc->ifc_unrhdr, *unit);
+ return (EEXIST);
}
- if (!wildcard) {
- bytoff = *unit >> 3;
- bitoff = *unit - (bytoff << 3);
- }
+ IF_CLONE_ADDREF(ifc);
- if((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0) {
- err = EEXIST;
- goto done;
- }
- /*
- * Allocate the unit in the bitmap.
- */
- KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0,
- ("%s: bit is already set", __func__));
- ifc->ifc_units[bytoff] |= (1 << bitoff);
- IF_CLONE_ADDREF_LOCKED(ifc);
-
-done:
- IF_CLONE_UNLOCK(ifc);
- return (err);
+ return (0);
}
-void
-ifc_free_unit(struct if_clone *ifc, int unit)
+static int
+ifc_alloc_unit_next(struct if_clone *ifc, int *unit)
{
- int bytoff, bitoff;
+ int error;
+ *unit = alloc_unr(ifc->ifc_unrhdr);
+ if (*unit == -1)
+ return (ENOSPC);
- /*
- * Compute offset in the bitmap and deallocate the unit.
- */
- bytoff = unit >> 3;
- bitoff = unit - (bytoff << 3);
+ free_unr(ifc->ifc_unrhdr, *unit);
+ for (;;) {
+ error = ifc_alloc_unit_specific(ifc, unit);
+ if (error != EEXIST)
+ break;
- IF_CLONE_LOCK(ifc);
- KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0,
- ("%s: bit is already cleared", __func__));
- ifc->ifc_units[bytoff] &= ~(1 << bitoff);
- IF_CLONE_REMREF_LOCKED(ifc); /* releases lock */
+ (*unit)++;
+ }
+
+ return (error);
}
+int
+ifc_alloc_unit(struct if_clone *ifc, int *unit)
+{
+ if (*unit < 0)
+ return (ifc_alloc_unit_next(ifc, unit));
+ else
+ return (ifc_alloc_unit_specific(ifc, unit));
+}
+
void
-ifc_simple_attach(struct if_clone *ifc)
+ifc_free_unit(struct if_clone *ifc, int unit)
{
- int err;
- int unit;
- char name[IFNAMSIZ];
- struct ifc_simple_data *ifcs = ifc->ifc_data;
- KASSERT(ifcs->ifcs_minifs - 1 <= ifc->ifc_maxunit,
- ("%s: %s requested more units than allowed (%d > %d)",
- __func__, ifc->ifc_name, ifcs->ifcs_minifs,
- ifc->ifc_maxunit + 1));
-
- for (unit = 0; unit < ifcs->ifcs_minifs; unit++) {
- snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, unit);
- err = if_clone_createif(ifc, name, IFNAMSIZ, NULL);
- KASSERT(err == 0,
- ("%s: failed to create required interface %s",
- __func__, name));
- }
+ free_unr(ifc->ifc_unrhdr, unit);
+ IF_CLONE_REMREF(ifc);
}
-int
+static int
ifc_simple_match(struct if_clone *ifc, const char *name)
{
const char *cp;
@@ -554,7 +684,7 @@
return (1);
}
-int
+static int
ifc_simple_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
{
char *dp;
@@ -561,7 +691,6 @@
int wildcard;
int unit;
int err;
- struct ifc_simple_data *ifcs = ifc->ifc_data;
err = ifc_name2unit(name, &unit);
if (err != 0)
@@ -573,7 +702,7 @@
if (err != 0)
return (err);
- err = ifcs->ifcs_create(ifc, unit, params);
+ err = ifc->ifcs_create(ifc, unit, params);
if (err != 0) {
ifc_free_unit(ifc, unit);
return (err);
@@ -597,18 +726,17 @@
return (0);
}
-int
+static int
ifc_simple_destroy(struct if_clone *ifc, struct ifnet *ifp)
{
int unit;
- struct ifc_simple_data *ifcs = ifc->ifc_data;
unit = ifp->if_dunit;
- if (unit < ifcs->ifcs_minifs)
+ if (unit < ifc->ifcs_minifs)
return (EINVAL);
- ifcs->ifcs_destroy(ifp);
+ ifc->ifcs_destroy(ifp);
ifc_free_unit(ifc, unit);
Modified: trunk/sys/net/if_clone.h
===================================================================
--- trunk/sys/net/if_clone.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_clone.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* From: @(#)if.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if_clone.h 195837 2009-07-23 20:46:49Z rwatson $
+ * $FreeBSD: stable/10/sys/net/if_clone.h 285824 2015-07-23 19:57:47Z hrs $
*/
#ifndef _NET_IF_CLONE_H_
@@ -36,82 +36,44 @@
#ifdef _KERNEL
-#define IFC_CLONE_INITIALIZER(name, data, maxunit, \
- attach, match, create, destroy) \
- { { 0 }, name, maxunit, NULL, 0, data, attach, match, create, destroy }
+struct if_clone;
-/*
- * Structure describing a `cloning' interface.
- *
- * List of locks
- * (c) const until freeing
- * (d) driver specific data, may need external protection.
- * (e) locked by if_cloners_mtx
- * (i) locked by ifc_mtx mtx
- */
-struct if_clone {
- LIST_ENTRY(if_clone) ifc_list; /* (e) On list of cloners */
- const char *ifc_name; /* (c) Name of device, e.g. `gif' */
- int ifc_maxunit; /* (c) Maximum unit number */
- unsigned char *ifc_units; /* (i) Bitmap to handle units. */
- /* Considered private, access */
- /* via ifc_(alloc|free)_unit(). */
- int ifc_bmlen; /* (c) Bitmap length. */
- void *ifc_data; /* (*) Data for ifc_* functions. */
+/* Methods. */
+typedef int ifc_match_t(struct if_clone *, const char *);
+typedef int ifc_create_t(struct if_clone *, char *, size_t, caddr_t);
+typedef int ifc_destroy_t(struct if_clone *, struct ifnet *);
- /* (c) Driver specific cloning functions. Called with no locks held. */
- void (*ifc_attach)(struct if_clone *);
- int (*ifc_match)(struct if_clone *, const char *);
- int (*ifc_create)(struct if_clone *, char *, size_t, caddr_t);
- int (*ifc_destroy)(struct if_clone *, struct ifnet *);
+typedef int ifcs_create_t(struct if_clone *, int, caddr_t);
+typedef void ifcs_destroy_t(struct ifnet *);
- long ifc_refcnt; /* (i) Refrence count. */
- struct mtx ifc_mtx; /* Muted to protect members. */
- LIST_HEAD(, ifnet) ifc_iflist; /* (i) List of cloned interfaces */
-};
-
-void if_clone_init(void);
-void if_clone_attach(struct if_clone *);
+/* Interface cloner (de)allocating functions. */
+struct if_clone *
+ if_clone_advanced(const char *, u_int, ifc_match_t, ifc_create_t,
+ ifc_destroy_t);
+struct if_clone *
+ if_clone_simple(const char *, ifcs_create_t, ifcs_destroy_t, u_int);
void if_clone_detach(struct if_clone *);
-void vnet_if_clone_init(void);
-int if_clone_create(char *, size_t, caddr_t);
-int if_clone_destroy(const char *);
-int if_clone_destroyif(struct if_clone *, struct ifnet *);
-int if_clone_list(struct if_clonereq *);
-
+/* Unit (de)allocating fucntions. */
int ifc_name2unit(const char *name, int *unit);
int ifc_alloc_unit(struct if_clone *, int *);
void ifc_free_unit(struct if_clone *, int);
-/*
- * The ifc_simple functions, structures, and macros implement basic
- * cloning as in 5.[012].
- */
-
-struct ifc_simple_data {
- int ifcs_minifs; /* minimum number of interfaces */
-
- int (*ifcs_create)(struct if_clone *, int, caddr_t);
- void (*ifcs_destroy)(struct ifnet *);
-};
-
-/* interface clone event */
+/* Interface clone event. */
typedef void (*if_clone_event_handler_t)(void *, struct if_clone *);
EVENTHANDLER_DECLARE(if_clone_event, if_clone_event_handler_t);
-#define IFC_SIMPLE_DECLARE(name, minifs) \
-struct ifc_simple_data name##_cloner_data = \
- {minifs, name##_clone_create, name##_clone_destroy}; \
-struct if_clone name##_cloner = \
- IFC_CLONE_INITIALIZER(#name, &name##_cloner_data, IF_MAXUNIT, \
- ifc_simple_attach, ifc_simple_match, ifc_simple_create, ifc_simple_destroy)
+/* The below interfaces used only by net/if.c. */
+void if_clone_init(void);
+void vnet_if_clone_init(void);
+int if_clone_create(char *, size_t, caddr_t);
+int if_clone_destroy(const char *);
+int if_clone_list(struct if_clonereq *);
+struct if_clone *if_clone_findifc(struct ifnet *);
+void if_clone_addgroup(struct ifnet *, struct if_clone *);
-void ifc_simple_attach(struct if_clone *);
-int ifc_simple_match(struct if_clone *, const char *);
-int ifc_simple_create(struct if_clone *, char *, size_t, caddr_t);
-int ifc_simple_destroy(struct if_clone *, struct ifnet *);
+/* The below interface used only by epair(4). */
+int if_clone_destroyif(struct if_clone *, struct ifnet *);
#endif /* _KERNEL */
-
#endif /* !_NET_IF_CLONE_H_ */
Modified: trunk/sys/net/if_dead.c
===================================================================
--- trunk/sys/net/if_dead.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_dead.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_dead.c 199975 2009-11-30 21:25:57Z jhb $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_dead.c 249925 2013-04-26 12:50:32Z glebius $");
#include <sys/param.h>
#include <sys/mbuf.h>
@@ -43,7 +43,7 @@
#include <net/if_var.h>
static int
-ifdead_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
+ifdead_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
struct route *ro)
{
Modified: trunk/sys/net/if_debug.c
===================================================================
--- trunk/sys/net/if_debug.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_debug.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_debug.c 223735 2011-07-03 12:22:02Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_debug.c 223735 2011-07-03 12:22:02Z bz $");
#include "opt_ddb.h"
Modified: trunk/sys/net/if_disc.c
===================================================================
--- trunk/sys/net/if_disc.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_disc.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* From: @(#)if_loop.c 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if_disc.c 191148 2009-04-16 20:30:28Z kmacy $
+ * $FreeBSD: stable/10/sys/net/if_disc.c 263478 2014-03-21 15:15:30Z glebius $
*/
/*
@@ -60,22 +60,21 @@
#define DSMTU 65532
#endif
-#define DISCNAME "disc"
-
struct disc_softc {
struct ifnet *sc_ifp;
};
static int discoutput(struct ifnet *, struct mbuf *,
- struct sockaddr *, struct route *);
+ const struct sockaddr *, struct route *);
static void discrtrequest(int, struct rtentry *, struct rt_addrinfo *);
static int discioctl(struct ifnet *, u_long, caddr_t);
static int disc_clone_create(struct if_clone *, int, caddr_t);
static void disc_clone_destroy(struct ifnet *);
-static MALLOC_DEFINE(M_DISC, DISCNAME, "Discard interface");
+static const char discname[] = "disc";
+static MALLOC_DEFINE(M_DISC, discname, "Discard interface");
-IFC_SIMPLE_DECLARE(disc, 0);
+static struct if_clone *disc_cloner;
static int
disc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
@@ -91,7 +90,7 @@
}
ifp->if_softc = sc;
- if_initname(ifp, ifc->ifc_name, unit);
+ if_initname(ifp, discname, unit);
ifp->if_mtu = DSMTU;
/*
* IFF_LOOPBACK should not be removed from disc's flags because
@@ -136,10 +135,11 @@
switch (type) {
case MOD_LOAD:
- if_clone_attach(&disc_cloner);
+ disc_cloner = if_clone_simple(discname, disc_clone_create,
+ disc_clone_destroy, 0);
break;
case MOD_UNLOAD:
- if_clone_detach(&disc_cloner);
+ if_clone_detach(disc_cloner);
break;
default:
return (EOPNOTSUPP);
@@ -156,7 +156,7 @@
DECLARE_MODULE(if_disc, disc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
static int
-discoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+discoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct route *ro)
{
u_int32_t af;
@@ -164,15 +164,14 @@
M_ASSERTPKTHDR(m);
/* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
+ if (dst->sa_family == AF_UNSPEC)
bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
+ else
+ af = dst->sa_family;
- if (bpf_peers_present(ifp->if_bpf)) {
- u_int af = dst->sa_family;
+ if (bpf_peers_present(ifp->if_bpf))
bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
- }
+
m->m_pkthdr.rcvif = ifp;
ifp->if_opackets++;
@@ -187,7 +186,7 @@
discrtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
{
RT_LOCK_ASSERT(rt);
- rt->rt_rmx.rmx_mtu = DSMTU;
+ rt->rt_mtu = DSMTU;
}
/*
Modified: trunk/sys/net/if_dl.h
===================================================================
--- trunk/sys/net/if_dl.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_dl.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)if_dl.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if_dl.h 139823 2005-01-07 01:45:51Z imp $
+ * $FreeBSD: stable/10/sys/net/if_dl.h 235640 2012-05-19 02:39:43Z marcel $
*/
#ifndef _NET_IF_DL_H_
@@ -68,6 +68,7 @@
};
#define LLADDR(s) ((caddr_t)((s)->sdl_data + (s)->sdl_nlen))
+#define LLINDEX(s) ((s)->sdl_index)
#ifndef _KERNEL
Modified: trunk/sys/net/if_edsc.c
===================================================================
--- trunk/sys/net/if_edsc.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_edsc.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* From: @(#)if_loop.c 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if_edsc.c 167896 2007-03-26 09:05:10Z yar $
+ * $FreeBSD: stable/10/sys/net/if_edsc.c 241610 2012-10-16 13:37:54Z glebius $
*/
/*
@@ -52,6 +52,8 @@
#include <net/if_types.h> /* IFT_ETHER and friends */
#include <net/if_var.h> /* kernel-only part of ifnet(9) */
+static const char edscname[] = "edsc";
+
/*
* Software configuration of an interface specific to this device type.
*/
@@ -65,9 +67,9 @@
};
/*
- * Simple cloning methods.
- * IFC_SIMPLE_DECLARE() expects precisely these names.
+ * Attach to the interface cloning framework.
*/
+static struct if_clone *edsc_cloner;
static int edsc_clone_create(struct if_clone *, int, caddr_t);
static void edsc_clone_destroy(struct ifnet *);
@@ -82,17 +84,9 @@
/*
* We'll allocate softc instances from this.
*/
-static MALLOC_DEFINE(M_EDSC, "edsc", "Ethernet discard interface");
+static MALLOC_DEFINE(M_EDSC, edscname, "Ethernet discard interface");
/*
- * Attach to the interface cloning framework under the name of "edsc".
- * The second argument is the number of units to be created from
- * the outset. It's also the minimum number of units allowed.
- * We don't want any units created as soon as the driver is loaded.
- */
-IFC_SIMPLE_DECLARE(edsc, 0);
-
-/*
* Create an interface instance.
*/
static int
@@ -117,7 +111,7 @@
/*
* Get a name for this particular interface in its ifnet structure.
*/
- if_initname(ifp, ifc->ifc_name, unit);
+ if_initname(ifp, edscname, unit);
/*
* Typical Ethernet interface flags: we can do broadcast and
@@ -324,8 +318,13 @@
case MOD_LOAD:
/*
* Connect to the network interface cloning framework.
+ * The last argument is the number of units to be created
+ * from the outset. It's also the minimum number of units
+ * allowed. We don't want any units created as soon as the
+ * driver is loaded.
*/
- if_clone_attach(&edsc_cloner);
+ edsc_cloner = if_clone_simple(edscname, edsc_clone_create,
+ edsc_clone_destroy, 0);
break;
case MOD_UNLOAD:
@@ -333,7 +332,7 @@
* Disconnect from the cloning framework.
* Existing interfaces will be disposed of properly.
*/
- if_clone_detach(&edsc_cloner);
+ if_clone_detach(edsc_cloner);
break;
default:
Modified: trunk/sys/net/if_ef.c
===================================================================
--- trunk/sys/net/if_ef.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_ef.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -24,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/if_ef.c 207554 2010-05-03 07:32:50Z sobomax $
+ * $FreeBSD: stable/10/sys/net/if_ef.c 249925 2013-04-26 12:50:32Z glebius $
*/
#include "opt_inet.h"
@@ -103,7 +103,7 @@
extern int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m);
extern int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp,
- struct sockaddr *dst, short *tp, int *hlen);
+ const struct sockaddr *dst, short *tp, int *hlen);
/*
static void ef_reset (struct ifnet *);
@@ -115,7 +115,7 @@
static void ef_start(struct ifnet *);
static int ef_input(struct ifnet*, struct ether_header *, struct mbuf *);
static int ef_output(struct ifnet *ifp, struct mbuf **mp,
- struct sockaddr *dst, short *tp, int *hlen);
+ const struct sockaddr *dst, short *tp, int *hlen);
static int ef_load(void);
static int ef_unload(void);
@@ -152,14 +152,10 @@
ef_detach(struct efnet *sc)
{
struct ifnet *ifp = sc->ef_ifp;
- int s;
- s = splimp();
-
ether_ifdetach(ifp);
if_free(ifp);
- splx(s);
return 0;
}
@@ -173,11 +169,10 @@
{
struct efnet *sc = ifp->if_softc;
struct ifaddr *ifa = (struct ifaddr*)data;
- int s, error;
+ int error;
EFDEBUG("IOCTL %ld for %s\n", cmd, ifp->if_xname);
error = 0;
- s = splimp();
switch (cmd) {
case SIOCSIFFLAGS:
error = 0;
@@ -194,7 +189,6 @@
error = ether_ioctl(ifp, cmd, data);
break;
}
- splx(s);
return error;
}
@@ -393,8 +387,8 @@
}
static int
-ef_output(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst, short *tp,
- int *hlen)
+ef_output(struct ifnet *ifp, struct mbuf **mp, const struct sockaddr *dst,
+ short *tp, int *hlen)
{
struct efnet *sc = (struct efnet*)ifp->if_softc;
struct mbuf *m = *mp;
@@ -415,7 +409,7 @@
type = htons(m->m_pkthdr.len);
break;
case ETHER_FT_8022:
- M_PREPEND(m, ETHER_HDR_LEN + 3, M_WAIT);
+ M_PREPEND(m, ETHER_HDR_LEN + 3, M_WAITOK);
/*
* Ensure that ethernet header and next three bytes
* will fit into single mbuf
@@ -434,7 +428,7 @@
*hlen += 3;
break;
case ETHER_FT_SNAP:
- M_PREPEND(m, 8, M_WAIT);
+ M_PREPEND(m, 8, M_WAITOK);
type = htons(m->m_pkthdr.len);
cp = mtod(m, u_char *);
bcopy("\xAA\xAA\x03\x00\x00\x00\x81\x37", cp, 8);
Modified: trunk/sys/net/if_enc.c
===================================================================
--- trunk/sys/net/if_enc.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_enc.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -25,7 +25,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/if_enc.c 248085 2013-03-09 02:36:32Z marius $
+ * $FreeBSD: stable/10/sys/net/if_enc.c 255926 2013-09-28 14:14:23Z glebius $
*/
#include "opt_inet.h"
@@ -89,12 +89,12 @@
static int enc_ioctl(struct ifnet *, u_long, caddr_t);
static int enc_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro);
+ const struct sockaddr *dst, struct route *ro);
static int enc_clone_create(struct if_clone *, int, caddr_t);
static void enc_clone_destroy(struct ifnet *);
+static struct if_clone *enc_cloner;
+static const char encname[] = "enc";
-IFC_SIMPLE_DECLARE(enc, 1);
-
/*
* Sysctls.
*/
@@ -144,7 +144,7 @@
return (ENOSPC);
}
- if_initname(ifp, ifc->ifc_name, unit);
+ if_initname(ifp, encname, unit);
ifp->if_mtu = ENCMTU;
ifp->if_ioctl = enc_ioctl;
ifp->if_output = enc_output;
@@ -168,7 +168,8 @@
switch (type) {
case MOD_LOAD:
mtx_init(&enc_mtx, "enc mtx", NULL, MTX_DEF);
- if_clone_attach(&enc_cloner);
+ enc_cloner = if_clone_simple(encname, enc_clone_create,
+ enc_clone_destroy, 1);
break;
case MOD_UNLOAD:
printf("enc module unload - not possible for this module\n");
@@ -188,7 +189,7 @@
DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
static int
-enc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+enc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct route *ro)
{
m_freem(m);
@@ -271,23 +272,8 @@
switch (ip->ip_v) {
#ifdef INET
case 4:
- /*
- * before calling the firewall, swap fields the same as
- * IP does. here we assume the header is contiguous
- */
- ip->ip_len = ntohs(ip->ip_len);
- ip->ip_off = ntohs(ip->ip_off);
-
error = pfil_run_hooks(&V_inet_pfil_hook, mp,
encif, dir, NULL);
-
- if (*mp == NULL || error != 0)
- break;
-
- /* restore byte ordering */
- ip = mtod(*mp, struct ip *);
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
break;
#endif
#ifdef INET6
Modified: trunk/sys/net/if_enc.h
===================================================================
--- trunk/sys/net/if_enc.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_enc.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -25,7 +25,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/if_enc.h 181627 2008-08-12 09:05:01Z vanhu $
+ * $FreeBSD: stable/10/sys/net/if_enc.h 181627 2008-08-12 09:05:01Z vanhu $
*/
#ifndef _NET_IF_ENC_H
Modified: trunk/sys/net/if_epair.c
===================================================================
--- trunk/sys/net/if_epair.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_epair.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -49,7 +49,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_epair.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_epair.c 287594 2015-09-09 08:52:39Z hrs $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -73,8 +73,6 @@
#include <net/netisr.h>
#include <net/vnet.h>
-#define EPAIRNAME "epair"
-
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl");
@@ -101,9 +99,11 @@
static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t);
static int epair_clone_destroy(struct if_clone *, struct ifnet *);
-/* Netisr realted definitions and sysctl. */
+static const char epairname[] = "epair";
+
+/* Netisr related definitions and sysctl. */
static struct netisr_handler epair_nh = {
- .nh_name = EPAIRNAME,
+ .nh_name = epairname,
.nh_proto = NETISR_EPAIR,
.nh_policy = NETISR_POLICY_CPU,
.nh_handler = epair_nh_sintr,
@@ -169,12 +169,11 @@
#define EPAIR_REFCOUNT_ASSERT(a, p)
#endif
-static MALLOC_DEFINE(M_EPAIR, EPAIRNAME,
+static MALLOC_DEFINE(M_EPAIR, epairname,
"Pair of virtual cross-over connected Ethernet-like interfaces");
-static struct if_clone epair_cloner = IFC_CLONE_INITIALIZER(
- EPAIRNAME, NULL, IF_MAXUNIT,
- NULL, epair_clone_match, epair_clone_create, epair_clone_destroy);
+static VNET_DEFINE(struct if_clone *, epair_cloner);
+#define V_epair_cloner VNET(epair_cloner)
/*
* DPCPU area and functions.
@@ -693,10 +692,10 @@
* - epair<n>
* but not the epair<n>[ab] versions.
*/
- if (strncmp(EPAIRNAME, name, sizeof(EPAIRNAME)-1) != 0)
+ if (strncmp(epairname, name, sizeof(epairname)-1) != 0)
return (0);
- for (cp = name + sizeof(EPAIRNAME) - 1; *cp != '\0'; cp++) {
+ for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) {
if (*cp < '0' || *cp > '9')
return (0);
}
@@ -715,7 +714,7 @@
/*
* We are abusing params to create our second interface.
- * Actually we already created it and called if_clone_createif()
+ * Actually we already created it and called if_clone_create()
* for it to do the official insertion procedure the moment we knew
* it cannot fail anymore. So just do attach it here.
*/
@@ -762,10 +761,17 @@
ifc_free_unit(ifc, unit);
return (ENOSPC);
}
- *dp = 'a';
+ *dp = 'b';
/* Must not change dp so we can replace 'a' by 'b' later. */
*(dp+1) = '\0';
+ /* Check if 'a' and 'b' interfaces already exist. */
+ if (ifunit(name) != NULL)
+ return (EEXIST);
+ *dp = 'a';
+ if (ifunit(name) != NULL)
+ return (EEXIST);
+
/* Allocate memory for both [ab] interfaces */
sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
EPAIR_REFCOUNT_INIT(&sca->refcount, 1);
@@ -803,12 +809,20 @@
netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount());
scb->cpuid =
netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount());
+
+ /* Initialise pseudo media types. */
+ ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
+ ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL);
+ ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T);
+ ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status);
+ ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL);
+ ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T);
/* Finish initialization of interface <n>a. */
ifp = sca->ifp;
ifp->if_softc = sca;
strlcpy(ifp->if_xname, name, IFNAMSIZ);
- ifp->if_dname = ifc->ifc_name;
+ ifp->if_dname = epairname;
ifp->if_dunit = unit;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_capabilities = IFCAP_VLAN_MTU;
@@ -826,7 +840,7 @@
sca->if_qflush = ifp->if_qflush;
ifp->if_qflush = epair_qflush;
ifp->if_transmit = epair_transmit;
- ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */
+ if_initbaudrate(ifp, IF_Gbps(10)); /* arbitrary maximum */
/* Swap the name and finish initialization of interface <n>b. */
*dp = 'b';
@@ -834,7 +848,7 @@
ifp = scb->ifp;
ifp->if_softc = scb;
strlcpy(ifp->if_xname, name, IFNAMSIZ);
- ifp->if_dname = ifc->ifc_name;
+ ifp->if_dname = epairname;
ifp->if_dunit = unit;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_capabilities = IFCAP_VLAN_MTU;
@@ -844,15 +858,15 @@
ifp->if_init = epair_init;
ifp->if_snd.ifq_maxlen = ifqmaxlen;
/* We need to play some tricks here for the second interface. */
- strlcpy(name, EPAIRNAME, len);
+ strlcpy(name, epairname, len);
error = if_clone_create(name, len, (caddr_t)scb);
if (error)
- panic("%s: if_clone_createif() for our 2nd iface failed: %d",
+ panic("%s: if_clone_create() for our 2nd iface failed: %d",
__func__, error);
scb->if_qflush = ifp->if_qflush;
ifp->if_qflush = epair_qflush;
ifp->if_transmit = epair_transmit;
- ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */
+ if_initbaudrate(ifp, IF_Gbps(10)); /* arbitrary maximum */
/*
* Restore name to <n>a as the ifp for this will go into the
@@ -861,14 +875,6 @@
strlcpy(name, sca->ifp->if_xname, len);
DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb);
- /* Initialise pseudo media types. */
- ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
- ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL);
- ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T);
- ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status);
- ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL);
- ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T);
-
/* Tell the world, that we are ready to rock. */
sca->ifp->if_drv_flags |= IFF_DRV_RUNNING;
scb->ifp->if_drv_flags |= IFF_DRV_RUNNING;
@@ -946,6 +952,25 @@
return (0);
}
+static void
+vnet_epair_init(const void *unused __unused)
+{
+
+ V_epair_cloner = if_clone_advanced(epairname, 0,
+ epair_clone_match, epair_clone_create, epair_clone_destroy);
+}
+VNET_SYSINIT(vnet_epair_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_epair_init, NULL);
+
+static void
+vnet_epair_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(V_epair_cloner);
+}
+VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_epair_uninit, NULL);
+
static int
epair_modevent(module_t mod, int type, void *data)
{
@@ -959,16 +984,14 @@
if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit))
epair_nh.nh_qlimit = qlimit;
netisr_register(&epair_nh);
- if_clone_attach(&epair_cloner);
if (bootverbose)
- printf("%s initialized.\n", EPAIRNAME);
+ printf("%s initialized.\n", epairname);
break;
case MOD_UNLOAD:
- if_clone_detach(&epair_cloner);
netisr_unregister(&epair_nh);
epair_dpcpu_detach();
if (bootverbose)
- printf("%s unloaded.\n", EPAIRNAME);
+ printf("%s unloaded.\n", epairname);
break;
default:
return (EOPNOTSUPP);
Modified: trunk/sys/net/if_ethersubr.c
===================================================================
--- trunk/sys/net/if_ethersubr.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_ethersubr.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if_ethersubr.c 249132 2013-04-05 08:22:11Z mav $
+ * $FreeBSD: stable/10/sys/net/if_ethersubr.c 332160 2018-04-07 00:04:28Z brooks $
*/
#include "opt_atalk.h"
@@ -40,6 +40,8 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/eventhandler.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
@@ -46,10 +48,10 @@
#include <sys/module.h>
#include <sys/mbuf.h>
#include <sys/random.h>
-#include <sys/rwlock.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
+#include <sys/uuid.h>
#include <net/if.h>
#include <net/if_arp.h>
@@ -63,9 +65,11 @@
#include <net/if_bridgevar.h>
#include <net/if_vlan_var.h>
#include <net/if_llatbl.h>
-#include <net/pf_mtag.h>
+#include <net/pfil.h>
#include <net/vnet.h>
+#include <netpfil/pf/pf_mtag.h>
+
#if defined(INET) || defined(INET6)
#include <netinet/in.h>
#include <netinet/in_var.h>
@@ -72,8 +76,6 @@
#include <netinet/if_ether.h>
#include <netinet/ip_carp.h>
#include <netinet/ip_var.h>
-#include <netinet/ip_fw.h>
-#include <netpfil/ipfw/ip_fw_private.h>
#endif
#ifdef INET6
#include <netinet6/nd6.h>
@@ -86,7 +88,7 @@
int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m);
int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp,
- struct sockaddr *dst, short *tp, int *hlen);
+ const struct sockaddr *dst, short *tp, int *hlen);
#ifdef NETATALK
#include <netatalk/at.h>
@@ -107,6 +109,8 @@
CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
#endif
+VNET_DEFINE(struct pfil_head, link_pfil_hook); /* Packet filter hooks */
+
/* netgraph node hooks for ng_ether(4) */
void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
void (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
@@ -142,13 +146,21 @@
#define senderr(e) do { error = (e); goto bad;} while (0)
-#if defined(INET) || defined(INET6)
-int
-ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared);
-static VNET_DEFINE(int, ether_ipfw);
-#define V_ether_ipfw VNET(ether_ipfw)
-#endif
+static void
+update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
+{
+ int csum_flags = 0;
+ if (src->m_pkthdr.csum_flags & CSUM_IP)
+ csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
+ if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
+ csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
+ if (src->m_pkthdr.csum_flags & CSUM_SCTP)
+ csum_flags |= CSUM_SCTP_VALID;
+ dst->m_pkthdr.csum_flags |= csum_flags;
+ if (csum_flags & CSUM_DATA_VALID)
+ dst->m_pkthdr.csum_data = 0xffff;
+}
/*
* Ethernet output routine.
@@ -158,7 +170,7 @@
*/
int
ether_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro)
+ const struct sockaddr *dst, struct route *ro)
{
short type;
int error = 0, hdrcmplt = 0;
@@ -247,8 +259,8 @@
goto bad;
} else
type = htons(ETHERTYPE_IPX);
- bcopy((caddr_t)&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
- (caddr_t)edst, sizeof (edst));
+ bcopy(&((const struct sockaddr_ipx *)dst)->sipx_addr.x_host,
+ edst, sizeof (edst));
break;
#endif
#ifdef NETATALK
@@ -256,9 +268,9 @@
{
struct at_ifaddr *aa;
- if ((aa = at_ifawithnet((struct sockaddr_at *)dst)) == NULL)
+ if ((aa = at_ifawithnet((const struct sockaddr_at *)dst)) == NULL)
senderr(EHOSTUNREACH); /* XXX */
- if (!aarpresolve(ifp, m, (struct sockaddr_at *)dst, edst)) {
+ if (!aarpresolve(ifp, m, (const struct sockaddr_at *)dst, edst)) {
ifa_free(&aa->aa_ifa);
return (0);
}
@@ -269,7 +281,7 @@
struct llc llc;
ifa_free(&aa->aa_ifa);
- M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
+ M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
if (m == NULL)
senderr(ENOBUFS);
llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
@@ -288,18 +300,21 @@
#endif /* NETATALK */
case pseudo_AF_HDRCMPLT:
+ {
+ const struct ether_header *eh;
+
hdrcmplt = 1;
- eh = (struct ether_header *)dst->sa_data;
+ eh = (const struct ether_header *)dst->sa_data;
(void)memcpy(esrc, eh->ether_shost, sizeof (esrc));
/* FALLTHROUGH */
case AF_UNSPEC:
loop_copy = 0; /* if this is for us, don't do it */
- eh = (struct ether_header *)dst->sa_data;
+ eh = (const struct ether_header *)dst->sa_data;
(void)memcpy(edst, eh->ether_dhost, sizeof (edst));
type = eh->ether_type;
break;
-
+ }
default:
if_printf(ifp, "can't handle af%d\n", dst->sa_family);
senderr(EAFNOSUPPORT);
@@ -306,15 +321,7 @@
}
if (lle != NULL && (lle->la_flags & LLE_IFADDR)) {
- int csum_flags = 0;
- if (m->m_pkthdr.csum_flags & CSUM_IP)
- csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
- csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
- if (m->m_pkthdr.csum_flags & CSUM_SCTP)
- csum_flags |= CSUM_SCTP_VALID;
- m->m_pkthdr.csum_flags |= csum_flags;
- m->m_pkthdr.csum_data = 0xffff;
+ update_mbuf_csumflags(m, m);
return (if_simloop(ifp, m, dst->sa_family, 0));
}
@@ -322,7 +329,7 @@
* Add local net header. If no space in first mbuf,
* allocate another.
*/
- M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
+ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
if (m == NULL)
senderr(ENOBUFS);
eh = mtod(m, struct ether_header *);
@@ -347,15 +354,6 @@
*/
if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy &&
((t = pf_find_mtag(m)) == NULL || !t->routed)) {
- int csum_flags = 0;
-
- if (m->m_pkthdr.csum_flags & CSUM_IP)
- csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
- csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
- if (m->m_pkthdr.csum_flags & CSUM_SCTP)
- csum_flags |= CSUM_SCTP_VALID;
-
if (m->m_flags & M_BCAST) {
struct mbuf *n;
@@ -371,18 +369,14 @@
* often used kernel parts suffer from the same bug.
* See PR kern/105943 for a proposed general solution.
*/
- if ((n = m_dup(m, M_DONTWAIT)) != NULL) {
- n->m_pkthdr.csum_flags |= csum_flags;
- if (csum_flags & CSUM_DATA_VALID)
- n->m_pkthdr.csum_data = 0xffff;
+ if ((n = m_dup(m, M_NOWAIT)) != NULL) {
+ update_mbuf_csumflags(m, n);
(void)if_simloop(ifp, n, dst->sa_family, hlen);
} else
ifp->if_iqdrops++;
} else if (bcmp(eh->ether_dhost, eh->ether_shost,
ETHER_ADDR_LEN) == 0) {
- m->m_pkthdr.csum_flags |= csum_flags;
- if (csum_flags & CSUM_DATA_VALID)
- m->m_pkthdr.csum_data = 0xffff;
+ update_mbuf_csumflags(m, m);
(void) if_simloop(ifp, m, dst->sa_family, hlen);
return (0); /* XXX */
}
@@ -398,7 +392,7 @@
#if defined(INET) || defined(INET6)
if (ifp->if_carp &&
- (error = (*carp_output_p)(ifp, m, dst, NULL)))
+ (error = (*carp_output_p)(ifp, m, dst)))
goto bad;
#endif
@@ -428,18 +422,17 @@
int
ether_output_frame(struct ifnet *ifp, struct mbuf *m)
{
-#if defined(INET) || defined(INET6)
+ int i;
- if (V_ip_fw_chk_ptr && V_ether_ipfw != 0) {
- if (ether_ipfw_chk(&m, ifp, 0) == 0) {
- if (m) {
- m_freem(m);
- return EACCES; /* pkt dropped */
- } else
- return 0; /* consumed e.g. in a pipe */
- }
+ if (PFIL_HOOKED(&V_link_pfil_hook)) {
+ i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_OUT, NULL);
+
+ if (i != 0)
+ return (EACCES);
+
+ if (m == NULL)
+ return (0);
}
-#endif
/*
* Queue message on interface, update output statistics if
@@ -449,113 +442,6 @@
}
#if defined(INET) || defined(INET6)
-/*
- * ipfw processing for ethernet packets (in and out).
- * The second parameter is NULL from ether_demux, and ifp from
- * ether_output_frame.
- */
-int
-ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared)
-{
- struct ether_header *eh;
- struct ether_header save_eh;
- struct mbuf *m;
- int i;
- struct ip_fw_args args;
- struct m_tag *mtag;
-
- /* fetch start point from rule, if any */
- mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
- if (mtag == NULL) {
- args.rule.slot = 0;
- } else {
- /* dummynet packet, already partially processed */
- struct ipfw_rule_ref *r;
-
- /* XXX can we free it after use ? */
- mtag->m_tag_id = PACKET_TAG_NONE;
- r = (struct ipfw_rule_ref *)(mtag + 1);
- if (r->info & IPFW_ONEPASS)
- return (1);
- args.rule = *r;
- }
-
- /*
- * I need some amt of data to be contiguous, and in case others need
- * the packet (shared==1) also better be in the first mbuf.
- */
- m = *m0;
- i = min( m->m_pkthdr.len, max_protohdr);
- if ( shared || m->m_len < i) {
- m = m_pullup(m, i);
- if (m == NULL) {
- *m0 = m;
- return 0;
- }
- }
- eh = mtod(m, struct ether_header *);
- save_eh = *eh; /* save copy for restore below */
- m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */
-
- args.m = m; /* the packet we are looking at */
- args.oif = dst; /* destination, if any */
- args.next_hop = NULL; /* we do not support forward yet */
- args.next_hop6 = NULL; /* we do not support forward yet */
- args.eh = &save_eh; /* MAC header for bridged/MAC packets */
- args.inp = NULL; /* used by ipfw uid/gid/jail rules */
- i = V_ip_fw_chk_ptr(&args);
- m = args.m;
- if (m != NULL) {
- /*
- * Restore Ethernet header, as needed, in case the
- * mbuf chain was replaced by ipfw.
- */
- M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
- if (m == NULL) {
- *m0 = m;
- return 0;
- }
- if (eh != mtod(m, struct ether_header *))
- bcopy(&save_eh, mtod(m, struct ether_header *),
- ETHER_HDR_LEN);
- }
- *m0 = m;
-
- if (i == IP_FW_DENY) /* drop */
- return 0;
-
- KASSERT(m != NULL, ("ether_ipfw_chk: m is NULL"));
-
- if (i == IP_FW_PASS) /* a PASS rule. */
- return 1;
-
- if (ip_dn_io_ptr && (i == IP_FW_DUMMYNET)) {
- int dir;
- /*
- * Pass the pkt to dummynet, which consumes it.
- * If shared, make a copy and keep the original.
- */
- if (shared) {
- m = m_copypacket(m, M_DONTWAIT);
- if (m == NULL)
- return 0;
- } else {
- /*
- * Pass the original to dummynet and
- * nothing back to the caller
- */
- *m0 = NULL ;
- }
- dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN);
- ip_dn_io_ptr(&m, dir, &args);
- return 0;
- }
- /*
- * XXX at some point add support for divert/forward actions.
- * If none of the above matches, we have to drop the pkt.
- */
- return 0;
-}
#endif
/*
@@ -646,7 +532,8 @@
m->m_flags &= ~M_HASFCS;
}
- ifp->if_ibytes += m->m_pkthdr.len;
+ if (!(ifp->if_capenable & IFCAP_HWSTATS))
+ ifp->if_ibytes += m->m_pkthdr.len;
/* Allow monitor mode to claim this frame, after stats are updated. */
if (ifp->if_flags & IFF_MONITOR) {
@@ -695,6 +582,7 @@
bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
ETHER_HDR_LEN - ETHER_TYPE_LEN);
m_adj(m, ETHER_VLAN_ENCAP_LEN);
+ eh = mtod(m, struct ether_header *);
}
M_SETFIB(m, ifp->if_fib);
@@ -709,6 +597,7 @@
CURVNET_RESTORE();
return;
}
+ eh = mtod(m, struct ether_header *);
}
/*
@@ -723,6 +612,7 @@
CURVNET_RESTORE();
return;
}
+ eh = mtod(m, struct ether_header *);
}
#if defined(INET) || defined(INET6)
@@ -753,9 +643,8 @@
m->m_flags |= M_PROMISC;
}
- /* First chunk of an mbuf contains good entropy */
if (harvest.ethernet)
- random_harvest(m, 16, 3, 0, RANDOM_NET);
+ random_harvest(&(m->m_data), 12, 2, RANDOM_NET_ETHER);
ether_demux(ifp, m);
CURVNET_RESTORE();
@@ -789,6 +678,35 @@
SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
static void
+vnet_ether_init(__unused void *arg)
+{
+ int i;
+
+ /* Initialize packet filter hooks. */
+ V_link_pfil_hook.ph_type = PFIL_TYPE_AF;
+ V_link_pfil_hook.ph_af = AF_LINK;
+ if ((i = pfil_head_register(&V_link_pfil_hook)) != 0)
+ printf("%s: WARNING: unable to register pfil link hook, "
+ "error %d\n", __func__, i);
+}
+VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+ vnet_ether_init, NULL);
+
+static void
+vnet_ether_destroy(__unused void *arg)
+{
+ int i;
+
+ if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0)
+ printf("%s: WARNING: unable to unregister pfil link hook, "
+ "error %d\n", __func__, i);
+}
+VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+ vnet_ether_destroy, NULL);
+
+
+
+static void
ether_input(struct ifnet *ifp, struct mbuf *m)
{
@@ -808,7 +726,7 @@
ether_demux(struct ifnet *ifp, struct mbuf *m)
{
struct ether_header *eh;
- int isr;
+ int i, isr;
u_short ether_type;
#if defined(NETATALK)
struct llc *l;
@@ -816,19 +734,14 @@
KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
-#if defined(INET) || defined(INET6)
- /*
- * Allow dummynet and/or ipfw to claim the frame.
- * Do not do this for PROMISC frames in case we are re-entered.
- */
- if (V_ip_fw_chk_ptr && V_ether_ipfw != 0 && !(m->m_flags & M_PROMISC)) {
- if (ether_ipfw_chk(&m, NULL, 0) == 0) {
- if (m)
- m_freem(m); /* dropped; free mbuf chain */
- return; /* consumed */
- }
+ /* Do not grab PROMISC frames in case we are re-entered. */
+ if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) {
+ i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, NULL);
+
+ if (i != 0 || m == NULL)
+ return;
}
-#endif
+
eh = mtod(m, struct ether_header *);
ether_type = ntohs(eh->ether_type);
@@ -865,7 +778,7 @@
* Strip off Ethernet header.
*/
m->m_flags &= ~M_VLANTAG;
- m->m_flags &= ~(M_PROTOFLAGS);
+ m_clrprotoflags(m);
m_adj(m, ETHER_HDR_LEN);
/*
@@ -954,7 +867,7 @@
* Put back the ethernet header so netgraph has a
* consistent view of inbound packets.
*/
- M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
+ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
(*ng_ether_input_orphan_p)(ifp, m);
return;
}
@@ -1008,6 +921,9 @@
sdl->sdl_alen = ifp->if_addrlen;
bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
+ if (ifp->if_hw_addr != NULL)
+ bcopy(lla, ifp->if_hw_addr, ifp->if_addrlen);
+
bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
if (ng_ether_attach_p != NULL)
(*ng_ether_attach_p)(ifp);
@@ -1018,6 +934,13 @@
break;
if (i != ifp->if_addrlen)
if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
+
+ uuid_ether_add(LLADDR(sdl));
+
+ /* Add necessary bits are setup; announce it now. */
+ EVENTHANDLER_INVOKE(ether_ifattach_event, ifp);
+ if (IS_DEFAULT_VNET(curvnet))
+ devctl_notify("ETHERNET", ifp->if_xname, "IFATTACH", NULL);
}
/*
@@ -1026,6 +949,11 @@
void
ether_ifdetach(struct ifnet *ifp)
{
+ struct sockaddr_dl *sdl;
+
+ sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr);
+ uuid_ether_del(LLADDR(sdl));
+
if (IFP2AC(ifp)->ac_netgraph != NULL) {
KASSERT(ng_ether_detach_p != NULL,
("ng_ether_detach_p is NULL"));
@@ -1057,10 +985,6 @@
SYSCTL_DECL(_net_link);
SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
-#if defined(INET) || defined(INET6)
-SYSCTL_VNET_INT(_net_link_ether, OID_AUTO, ipfw, CTLFLAG_RW,
- &VNET_NAME(ether_ipfw), 0, "Pass ether pkts through firewall");
-#endif
#if 0
/*
@@ -1186,13 +1110,8 @@
break;
case SIOCGIFADDR:
- {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *) & ifr->ifr_data;
- bcopy(IF_LLADDR(ifp),
- (caddr_t) sa->sa_data, ETHER_ADDR_LEN);
- }
+ bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
+ ETHER_ADDR_LEN);
break;
case SIOCSIFMTU:
@@ -1382,7 +1301,7 @@
{
struct ether_vlan_header *evl;
- M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
+ M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
if (m == NULL)
return (NULL);
/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
Modified: trunk/sys/net/if_faith.c
===================================================================
--- trunk/sys/net/if_faith.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_faith.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -29,7 +29,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/if_faith.c 232292 2012-02-29 09:47:26Z bz $
+ * $FreeBSD: stable/10/sys/net/if_faith.c 263478 2014-03-21 15:15:30Z glebius $
*/
/*
* derived from
@@ -80,14 +80,12 @@
#include <netinet6/ip6_var.h>
#endif
-#define FAITHNAME "faith"
-
struct faith_softc {
struct ifnet *sc_ifp;
};
static int faithioctl(struct ifnet *, u_long, caddr_t);
-int faithoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
+static int faithoutput(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
static void faithrtrequest(int, struct rtentry *, struct rt_addrinfo *);
#ifdef INET6
@@ -96,13 +94,13 @@
static int faithmodevent(module_t, int, void *);
-static MALLOC_DEFINE(M_FAITH, FAITHNAME, "Firewall Assisted Tunnel Interface");
+static const char faithname[] = "faith";
+static MALLOC_DEFINE(M_FAITH, faithname, "Firewall Assisted Tunnel Interface");
static int faith_clone_create(struct if_clone *, int, caddr_t);
static void faith_clone_destroy(struct ifnet *);
+static struct if_clone *faith_cloner;
-IFC_SIMPLE_DECLARE(faith, 0);
-
#define FAITHMTU 1500
static int
@@ -114,8 +112,8 @@
switch (type) {
case MOD_LOAD:
- if_clone_attach(&faith_cloner);
-
+ faith_cloner = if_clone_simple(faithname, faith_clone_create,
+ faith_clone_destroy, 0);
#ifdef INET6
faithprefix_p = faithprefix;
#endif
@@ -126,7 +124,7 @@
faithprefix_p = NULL;
#endif
- if_clone_detach(&faith_cloner);
+ if_clone_detach(faith_cloner);
break;
default:
return EOPNOTSUPP;
@@ -160,7 +158,7 @@
}
ifp->if_softc = sc;
- if_initname(sc->sc_ifp, ifc->ifc_name, unit);
+ if_initname(sc->sc_ifp, faithname, unit);
ifp->if_mtu = FAITHMTU;
/* Change to BROADCAST experimentaly to announce its prefix. */
@@ -187,12 +185,9 @@
free(sc, M_FAITH);
}
-int
-faithoutput(ifp, m, dst, ro)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr *dst;
- struct route *ro;
+static int
+faithoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
{
int isr;
u_int32_t af;
@@ -203,15 +198,13 @@
if (ro != NULL)
rt = ro->ro_rt;
/* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
+ if (dst->sa_family == AF_UNSPEC)
bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
+ else
+ af = dst->sa_family;
- if (bpf_peers_present(ifp->if_bpf)) {
- af = dst->sa_family;
+ if (bpf_peers_present(ifp->if_bpf))
bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
- }
if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
m_freem(m);
@@ -220,7 +213,7 @@
}
ifp->if_opackets++;
ifp->if_obytes += m->m_pkthdr.len;
- switch (dst->sa_family) {
+ switch (af) {
#ifdef INET
case AF_INET:
isr = NETISR_IP;
@@ -253,7 +246,7 @@
struct rt_addrinfo *info;
{
RT_LOCK_ASSERT(rt);
- rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+ rt->rt_mtu = rt->rt_ifp->if_mtu;
}
/*
Modified: trunk/sys/net/if_fddisubr.c
===================================================================
--- trunk/sys/net/if_fddisubr.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_fddisubr.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* from: if_ethersubr.c,v 1.5 1994/12/13 22:31:45 wollman Exp
- * $FreeBSD: stable/9/sys/net/if_fddisubr.c 223741 2011-07-03 16:08:38Z bz $
+ * $FreeBSD: stable/10/sys/net/if_fddisubr.c 332160 2018-04-07 00:04:28Z brooks $
*/
#include "opt_atalk.h"
@@ -97,7 +97,7 @@
static int fddi_resolvemulti(struct ifnet *, struct sockaddr **,
struct sockaddr *);
-static int fddi_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+static int fddi_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
static void fddi_input(struct ifnet *ifp, struct mbuf *m);
@@ -111,11 +111,8 @@
* Assumes that ifp is actually pointer to arpcom structure.
*/
static int
-fddi_output(ifp, m, dst, ro)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr *dst;
- struct route *ro;
+fddi_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
{
u_int16_t type;
int loop_copy = 0, error = 0, hdrcmplt = 0;
@@ -190,19 +187,19 @@
#ifdef IPX
case AF_IPX:
type = htons(ETHERTYPE_IPX);
- bcopy((caddr_t)&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
- (caddr_t)edst, FDDI_ADDR_LEN);
+ bcopy(&((const struct sockaddr_ipx *)dst)->sipx_addr.x_host,
+ edst, FDDI_ADDR_LEN);
break;
#endif /* IPX */
#ifdef NETATALK
case AF_APPLETALK: {
struct at_ifaddr *aa;
- if (!aarpresolve(ifp, m, (struct sockaddr_at *)dst, edst))
+ if (!aarpresolve(ifp, m, (const struct sockaddr_at *)dst, edst))
return (0);
/*
* ifaddr is the first thing in at_ifaddr
*/
- if ((aa = at_ifawithnet( (struct sockaddr_at *)dst)) == 0)
+ if ((aa = at_ifawithnet((const struct sockaddr_at *)dst)) == 0)
goto bad;
/*
@@ -213,7 +210,7 @@
if (aa->aa_flags & AFA_PHASE2) {
struct llc llc;
- M_PREPEND(m, LLC_SNAPFRAMELEN, M_WAIT);
+ M_PREPEND(m, LLC_SNAPFRAMELEN, M_WAITOK);
llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
llc.llc_control = LLC_UI;
bcopy(at_org_code, llc.llc_snap.org_code, sizeof(at_org_code));
@@ -230,19 +227,21 @@
case pseudo_AF_HDRCMPLT:
{
- struct ether_header *eh;
+ const struct ether_header *eh;
+
hdrcmplt = 1;
- eh = (struct ether_header *)dst->sa_data;
- bcopy((caddr_t)eh->ether_shost, (caddr_t)esrc, FDDI_ADDR_LEN);
+ eh = (const struct ether_header *)dst->sa_data;
+ bcopy(eh->ether_shost, esrc, FDDI_ADDR_LEN);
/* FALLTHROUGH */
}
case AF_UNSPEC:
{
- struct ether_header *eh;
+ const struct ether_header *eh;
+
loop_copy = -1;
- eh = (struct ether_header *)dst->sa_data;
- bcopy((caddr_t)eh->ether_dhost, (caddr_t)edst, FDDI_ADDR_LEN);
+ eh = (const struct ether_header *)dst->sa_data;
+ bcopy(eh->ether_dhost, edst, FDDI_ADDR_LEN);
if (*edst & 1)
m->m_flags |= (M_BCAST|M_MCAST);
type = eh->ether_type;
@@ -292,7 +291,7 @@
*/
if (type != 0) {
struct llc *l;
- M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
+ M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
if (m == 0)
senderr(ENOBUFS);
l = mtod(m, struct llc *);
@@ -308,7 +307,7 @@
* Add local net header. If no space in first mbuf,
* allocate another.
*/
- M_PREPEND(m, FDDI_HDR_LEN, M_DONTWAIT);
+ M_PREPEND(m, FDDI_HDR_LEN, M_NOWAIT);
if (m == 0)
senderr(ENOBUFS);
fh = mtod(m, struct fddi_header *);
@@ -392,7 +391,6 @@
goto dropanyway;
}
fh = mtod(m, struct fddi_header *);
- m->m_pkthdr.header = (void *)fh;
/*
* Discard packet if interface is not up.
@@ -672,14 +670,9 @@
break;
}
break;
- case SIOCGIFADDR: {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *) & ifr->ifr_data;
- bcopy(IF_LLADDR(ifp),
- (caddr_t) sa->sa_data, FDDI_ADDR_LEN);
-
- }
+ case SIOCGIFADDR:
+ bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
+ FDDI_ADDR_LEN);
break;
case SIOCSIFMTU:
/*
Modified: trunk/sys/net/if_fwsubr.c
===================================================================
--- trunk/sys/net/if_fwsubr.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_fwsubr.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/if_fwsubr.c 249132 2013-04-05 08:22:11Z mav $
+ * $FreeBSD: stable/10/sys/net/if_fwsubr.c 332160 2018-04-07 00:04:28Z brooks $
*/
#include "opt_inet.h"
@@ -76,7 +76,7 @@
};
static int
-firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct route *ro)
{
struct fw_com *fc = IFP2FWC(ifp);
@@ -230,7 +230,7 @@
/*
* No fragmentation is necessary.
*/
- M_PREPEND(m, sizeof(uint32_t), M_DONTWAIT);
+ M_PREPEND(m, sizeof(uint32_t), M_NOWAIT);
if (!m) {
error = ENOBUFS;
goto bad;
@@ -262,7 +262,7 @@
* Split off the tail segment from the
* datagram, copying our tags over.
*/
- mtail = m_split(m, fsize, M_DONTWAIT);
+ mtail = m_split(m, fsize, M_NOWAIT);
m_tag_copy_chain(mtail, m, M_NOWAIT);
} else {
mtail = 0;
@@ -272,7 +272,7 @@
* Add our encapsulation header to this
* fragment and hand it off to the link.
*/
- M_PREPEND(m, 2*sizeof(uint32_t), M_DONTWAIT);
+ M_PREPEND(m, 2*sizeof(uint32_t), M_NOWAIT);
if (!m) {
error = ENOBUFS;
goto bad;
@@ -657,13 +657,8 @@
break;
case SIOCGIFADDR:
- {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *) & ifr->ifr_data;
- bcopy(&IFP2FWC(ifp)->fc_hwaddr,
- (caddr_t) sa->sa_data, sizeof(struct fw_hwaddr));
- }
+ bcopy(&IFP2FWC(ifp)->fc_hwaddr, &ifr->ifr_addr.sa_data[0],
+ sizeof(struct fw_hwaddr));
break;
case SIOCSIFMTU:
Modified: trunk/sys/net/if_gif.c
===================================================================
--- trunk/sys/net/if_gif.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_gif.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,7 +1,4 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/net/if_gif.c 248085 2013-03-09 02:36:32Z marius $ */
-/* $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $ */
-
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
@@ -29,8 +26,13 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/net/if_gif.c 293411 2016-01-08 02:59:56Z araujo $");
+
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -38,11 +40,14 @@
#include <sys/systm.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
+#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/sockio.h>
+#include <sys/sx.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/sysctl.h>
@@ -54,6 +59,7 @@
#include <machine/cpu.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_types.h>
#include <net/netisr.h>
@@ -64,9 +70,9 @@
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
+#include <netinet/ip_ecn.h>
#ifdef INET
#include <netinet/in_var.h>
-#include <netinet/in_gif.h>
#include <netinet/ip_var.h>
#endif /* INET */
@@ -76,9 +82,9 @@
#endif
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
+#include <netinet6/ip6_ecn.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
-#include <netinet6/in6_gif.h>
#include <netinet6/ip6protosw.h>
#endif /* INET6 */
@@ -89,27 +95,42 @@
#include <security/mac/mac_framework.h>
-#define GIFNAME "gif"
+static const char gifname[] = "gif";
/*
- * gif_mtx protects the global gif_softc_list.
+ * gif_mtx protects a per-vnet gif_softc_list.
*/
-static struct mtx gif_mtx;
+static VNET_DEFINE(struct mtx, gif_mtx);
+#define V_gif_mtx VNET(gif_mtx)
static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
#define V_gif_softc_list VNET(gif_softc_list)
+static struct sx gif_ioctl_sx;
+SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
+#define GIF_LIST_LOCK_INIT(x) mtx_init(&V_gif_mtx, "gif_mtx", \
+ NULL, MTX_DEF)
+#define GIF_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gif_mtx)
+#define GIF_LIST_LOCK(x) mtx_lock(&V_gif_mtx)
+#define GIF_LIST_UNLOCK(x) mtx_unlock(&V_gif_mtx)
+
void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
void (*ng_gif_attach_p)(struct ifnet *ifp);
void (*ng_gif_detach_p)(struct ifnet *ifp);
-static void gif_start(struct ifnet *);
+static int gif_check_nesting(struct ifnet *, struct mbuf *);
+static int gif_set_tunnel(struct ifnet *, struct sockaddr *,
+ struct sockaddr *);
+static void gif_delete_tunnel(struct ifnet *);
+static int gif_ioctl(struct ifnet *, u_long, caddr_t);
+static int gif_transmit(struct ifnet *, struct mbuf *);
+static void gif_qflush(struct ifnet *);
static int gif_clone_create(struct if_clone *, int, caddr_t);
static void gif_clone_destroy(struct ifnet *);
+static VNET_DEFINE(struct if_clone *, gif_cloner);
+#define V_gif_cloner VNET(gif_cloner)
-IFC_SIMPLE_DECLARE(gif, 0);
-
static int gifmodevent(module_t, int, void *);
SYSCTL_DECL(_net_link);
@@ -154,10 +175,7 @@
#endif
static int
-gif_clone_create(ifc, unit, params)
- struct if_clone *ifc;
- int unit;
- caddr_t params;
+gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
struct gif_softc *sc;
@@ -164,19 +182,10 @@
sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
sc->gif_fibnum = curthread->td_proc->p_fibnum;
GIF2IFP(sc) = if_alloc(IFT_GIF);
- if (GIF2IFP(sc) == NULL) {
- free(sc, M_GIF);
- return (ENOSPC);
- }
-
GIF_LOCK_INIT(sc);
-
GIF2IFP(sc)->if_softc = sc;
- if_initname(GIF2IFP(sc), ifc->ifc_name, unit);
+ if_initname(GIF2IFP(sc), gifname, unit);
- sc->encap_cookie4 = sc->encap_cookie6 = NULL;
- sc->gif_options = GIF_ACCEPT_REVETHIP;
-
GIF2IFP(sc)->if_addrlen = 0;
GIF2IFP(sc)->if_mtu = GIF_MTU;
GIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
@@ -185,56 +194,42 @@
GIF2IFP(sc)->if_flags |= IFF_LINK2;
#endif
GIF2IFP(sc)->if_ioctl = gif_ioctl;
- GIF2IFP(sc)->if_start = gif_start;
+ GIF2IFP(sc)->if_transmit = gif_transmit;
+ GIF2IFP(sc)->if_qflush = gif_qflush;
GIF2IFP(sc)->if_output = gif_output;
- GIF2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
+ GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
+ GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
if_attach(GIF2IFP(sc));
bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
if (ng_gif_attach_p != NULL)
(*ng_gif_attach_p)(GIF2IFP(sc));
- mtx_lock(&gif_mtx);
+ GIF_LIST_LOCK();
LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
- mtx_unlock(&gif_mtx);
-
+ GIF_LIST_UNLOCK();
return (0);
}
static void
-gif_clone_destroy(ifp)
- struct ifnet *ifp;
+gif_clone_destroy(struct ifnet *ifp)
{
-#if defined(INET) || defined(INET6)
- int err;
-#endif
- struct gif_softc *sc = ifp->if_softc;
+ struct gif_softc *sc;
- mtx_lock(&gif_mtx);
+ sx_xlock(&gif_ioctl_sx);
+ sc = ifp->if_softc;
+ gif_delete_tunnel(ifp);
+ GIF_LIST_LOCK();
LIST_REMOVE(sc, gif_list);
- mtx_unlock(&gif_mtx);
-
- gif_delete_tunnel(ifp);
-#ifdef INET6
- if (sc->encap_cookie6 != NULL) {
- err = encap_detach(sc->encap_cookie6);
- KASSERT(err == 0, ("Unexpected error detaching encap_cookie6"));
- }
-#endif
-#ifdef INET
- if (sc->encap_cookie4 != NULL) {
- err = encap_detach(sc->encap_cookie4);
- KASSERT(err == 0, ("Unexpected error detaching encap_cookie4"));
- }
-#endif
-
+ GIF_LIST_UNLOCK();
if (ng_gif_detach_p != NULL)
(*ng_gif_detach_p)(ifp);
bpfdetach(ifp);
if_detach(ifp);
+ ifp->if_softc = NULL;
+ sx_xunlock(&gif_ioctl_sx);
+
if_free(ifp);
-
GIF_LOCK_DESTROY(sc);
-
free(sc, M_GIF);
}
@@ -243,31 +238,35 @@
{
LIST_INIT(&V_gif_softc_list);
+ GIF_LIST_LOCK_INIT();
+ V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
+ gif_clone_destroy, 0);
}
-VNET_SYSINIT(vnet_gif_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, vnet_gif_init,
- NULL);
+VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_gif_init, NULL);
+static void
+vnet_gif_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(V_gif_cloner);
+ GIF_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_gif_uninit, NULL);
+
static int
-gifmodevent(mod, type, data)
- module_t mod;
- int type;
- void *data;
+gifmodevent(module_t mod, int type, void *data)
{
switch (type) {
case MOD_LOAD:
- mtx_init(&gif_mtx, "gif_mtx", NULL, MTX_DEF);
- if_clone_attach(&gif_cloner);
- break;
-
case MOD_UNLOAD:
- if_clone_detach(&gif_cloner);
- mtx_destroy(&gif_mtx);
break;
default:
- return EOPNOTSUPP;
+ return (EOPNOTSUPP);
}
- return 0;
+ return (0);
}
static moduledata_t gif_mod = {
@@ -280,113 +279,192 @@
MODULE_VERSION(if_gif, 1);
int
-gif_encapcheck(m, off, proto, arg)
- const struct mbuf *m;
- int off;
- int proto;
- void *arg;
+gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
{
- struct ip ip;
+ GIF_RLOCK_TRACKER;
struct gif_softc *sc;
+ int ret;
+ uint8_t ver;
sc = (struct gif_softc *)arg;
- if (sc == NULL)
- return 0;
+ if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
+ return (0);
- if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
- return 0;
+ ret = 0;
+ GIF_RLOCK(sc);
/* no physical address */
- if (!sc->gif_psrc || !sc->gif_pdst)
- return 0;
+ if (sc->gif_family == 0)
+ goto done;
switch (proto) {
#ifdef INET
case IPPROTO_IPV4:
- break;
#endif
#ifdef INET6
case IPPROTO_IPV6:
- break;
#endif
case IPPROTO_ETHERIP:
break;
-
default:
- return 0;
+ goto done;
}
/* Bail on short packets */
- if (m->m_pkthdr.len < sizeof(ip))
- return 0;
+ if (m->m_pkthdr.len < sizeof(struct ip))
+ goto done;
- m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
-
- switch (ip.ip_v) {
+ m_copydata(m, 0, 1, &ver);
+ switch (ver >> 4) {
#ifdef INET
case 4:
- if (sc->gif_psrc->sa_family != AF_INET ||
- sc->gif_pdst->sa_family != AF_INET)
- return 0;
- return gif_encapcheck4(m, off, proto, arg);
+ if (sc->gif_family != AF_INET)
+ goto done;
+ ret = in_gif_encapcheck(m, off, proto, arg);
+ break;
#endif
#ifdef INET6
case 6:
if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
- return 0;
- if (sc->gif_psrc->sa_family != AF_INET6 ||
- sc->gif_pdst->sa_family != AF_INET6)
- return 0;
- return gif_encapcheck6(m, off, proto, arg);
+ goto done;
+ if (sc->gif_family != AF_INET6)
+ goto done;
+ ret = in6_gif_encapcheck(m, off, proto, arg);
+ break;
#endif
- default:
- return 0;
}
+done:
+ GIF_RUNLOCK(sc);
+ return (ret);
}
-static void
-gif_start(struct ifnet *ifp)
+static int
+gif_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct gif_softc *sc;
- struct mbuf *m;
+ struct etherip_header *eth;
+#ifdef INET
+ struct ip *ip;
+#endif
+#ifdef INET6
+ struct ip6_hdr *ip6;
+ uint32_t t;
+#endif
+ uint32_t af;
+ uint8_t proto, ecn;
+ int error;
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error) {
+ m_freem(m);
+ goto err;
+ }
+#endif
+ error = ENETDOWN;
sc = ifp->if_softc;
+ if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+ (ifp->if_flags & IFF_UP) == 0 ||
+ sc->gif_family == 0 ||
+ (error = gif_check_nesting(ifp, m)) != 0) {
+ m_freem(m);
+ goto err;
+ }
+ /* Now pull back the af that we stashed in the csum_data. */
+ if (ifp->if_bridge)
+ af = AF_LINK;
+ else
+ af = m->m_pkthdr.csum_data;
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+ M_SETFIB(m, sc->gif_fibnum);
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
+ /* inner AF-specific encapsulation */
+ ecn = 0;
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ proto = IPPROTO_IPV4;
+ if (m->m_len < sizeof(struct ip))
+ m = m_pullup(m, sizeof(struct ip));
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto err;
+ }
+ ip = mtod(m, struct ip *);
+ ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+ ECN_NOCARE, &ecn, &ip->ip_tos);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ proto = IPPROTO_IPV6;
+ if (m->m_len < sizeof(struct ip6_hdr))
+ m = m_pullup(m, sizeof(struct ip6_hdr));
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto err;
+ }
+ t = 0;
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+ ECN_NOCARE, &t, &ip6->ip6_flow);
+ ecn = (ntohl(t) >> 20) & 0xff;
+ break;
+#endif
+ case AF_LINK:
+ proto = IPPROTO_ETHERIP;
+ M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto err;
+ }
+ eth = mtod(m, struct etherip_header *);
+ eth->eip_resvh = 0;
+ eth->eip_ver = ETHERIP_VERSION;
+ eth->eip_resvl = 0;
+ break;
+ default:
+ error = EAFNOSUPPORT;
+ m_freem(m);
+ goto err;
+ }
+ /* XXX should we check if our outer source is legal? */
+ /* dispatch to output logic based on outer AF */
+ switch (sc->gif_family) {
+#ifdef INET
+ case AF_INET:
+ error = in_gif_output(ifp, m, proto, ecn);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ error = in6_gif_output(ifp, m, proto, ecn);
+ break;
+#endif
+ default:
+ m_freem(m);
+ }
+err:
+ if (error)
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (error);
+}
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- for (;;) {
- IFQ_DEQUEUE(&ifp->if_snd, m);
- if (m == 0)
- break;
+static void
+gif_qflush(struct ifnet *ifp __unused)
+{
- gif_output(ifp, m, sc->gif_pdst, NULL);
-
- }
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-
- return;
}
-int
-gif_output(ifp, m, dst, ro)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr *dst;
- struct route *ro;
+#define MTAG_GIF 1080679712
+static int
+gif_check_nesting(struct ifnet *ifp, struct mbuf *m)
{
- struct gif_softc *sc = ifp->if_softc;
struct m_tag *mtag;
- int error = 0;
- int gif_called;
- u_int32_t af;
+ int count;
-#ifdef MAC
- error = mac_ifnet_check_transmit(ifp, m);
- if (error) {
- m_freem(m);
- goto end;
- }
-#endif
-
/*
* gif may cause infinite recursion calls when misconfigured.
* We'll prevent this by detecting loops.
@@ -394,105 +472,63 @@
* High nesting level may cause stack exhaustion.
* We'll prevent this by introducing upper limit.
*/
- gif_called = 1;
- mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
- while (mtag != NULL) {
+ count = 1;
+ mtag = NULL;
+ while ((mtag = m_tag_locate(m, MTAG_GIF, 0, mtag)) != NULL) {
if (*(struct ifnet **)(mtag + 1) == ifp) {
- log(LOG_NOTICE,
- "gif_output: loop detected on %s\n",
- (*(struct ifnet **)(mtag + 1))->if_xname);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
+ log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
+ return (EIO);
}
- mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
- gif_called++;
+ count++;
}
- if (gif_called > V_max_gif_nesting) {
+ if (count > V_max_gif_nesting) {
log(LOG_NOTICE,
- "gif_output: recursively called too many times(%d)\n",
- gif_called);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
+ "%s: if_output recursively called too many times(%d)\n",
+ if_name(ifp), count);
+ return (EIO);
}
- mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
- M_NOWAIT);
- if (mtag == NULL) {
- m_freem(m);
- error = ENOMEM;
- goto end;
- }
+ mtag = m_tag_alloc(MTAG_GIF, 0, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL)
+ return (ENOMEM);
*(struct ifnet **)(mtag + 1) = ifp;
m_tag_prepend(m, mtag);
+ return (0);
+}
- m->m_flags &= ~(M_BCAST|M_MCAST);
+int
+gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ uint32_t af;
- GIF_LOCK(sc);
-
- if (!(ifp->if_flags & IFF_UP) ||
- sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
- GIF_UNLOCK(sc);
- m_freem(m);
- error = ENETDOWN;
- goto end;
- }
-
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
+ if (dst->sa_family == AF_UNSPEC)
bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
+ else
+ af = dst->sa_family;
+ /*
+ * Now save the af in the inbound pkt csum data, this is a cheat since
+ * we are using the inbound csum_data field to carry the af over to
+ * the gif_transmit() routine, avoiding using yet another mtag.
+ */
+ m->m_pkthdr.csum_data = af;
+ return (ifp->if_transmit(ifp, m));
+}
- af = dst->sa_family;
- BPF_MTAP2(ifp, &af, sizeof(af), m);
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
-
- /* override to IPPROTO_ETHERIP for bridged traffic */
- if (ifp->if_bridge)
- af = AF_LINK;
-
- M_SETFIB(m, sc->gif_fibnum);
- /* inner AF-specific encapsulation */
-
- /* XXX should we check if our outer source is legal? */
-
- /* dispatch to output logic based on outer AF */
- switch (sc->gif_psrc->sa_family) {
+void
+gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
+{
+ struct etherip_header *eip;
#ifdef INET
- case AF_INET:
- error = in_gif_output(ifp, af, m);
- break;
+ struct ip *ip;
#endif
#ifdef INET6
- case AF_INET6:
- error = in6_gif_output(ifp, af, m);
- break;
+ struct ip6_hdr *ip6;
+ uint32_t t;
#endif
- default:
- m_freem(m);
- error = ENETDOWN;
- }
-
- GIF_UNLOCK(sc);
- end:
- if (error)
- ifp->if_oerrors++;
- return (error);
-}
-
-void
-gif_input(m, af, ifp)
- struct mbuf *m;
- int af;
- struct ifnet *ifp;
-{
- int isr, n;
struct gif_softc *sc;
- struct etherip_header *eip;
struct ether_header *eh;
struct ifnet *oldifp;
+ int isr, n, af;
if (ifp == NULL) {
/* just in case */
@@ -501,6 +537,46 @@
}
sc = ifp->if_softc;
m->m_pkthdr.rcvif = ifp;
+ m_clrprotoflags(m);
+ switch (proto) {
+#ifdef INET
+ case IPPROTO_IPV4:
+ af = AF_INET;
+ if (m->m_len < sizeof(struct ip))
+ m = m_pullup(m, sizeof(struct ip));
+ if (m == NULL)
+ goto drop;
+ ip = mtod(m, struct ip *);
+ if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+ ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
+ m_freem(m);
+ goto drop;
+ }
+ break;
+#endif
+#ifdef INET6
+ case IPPROTO_IPV6:
+ af = AF_INET6;
+ if (m->m_len < sizeof(struct ip6_hdr))
+ m = m_pullup(m, sizeof(struct ip6_hdr));
+ if (m == NULL)
+ goto drop;
+ t = htonl((uint32_t)ecn << 20);
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+ ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
+ m_freem(m);
+ goto drop;
+ }
+ break;
+#endif
+ case IPPROTO_ETHERIP:
+ af = AF_LINK;
+ break;
+ default:
+ m_freem(m);
+ goto drop;
+ }
#ifdef MAC
mac_ifnet_create_mbuf(ifp, m);
@@ -507,14 +583,21 @@
#endif
if (bpf_peers_present(ifp->if_bpf)) {
- u_int32_t af1 = af;
+ uint32_t af1 = af;
bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
}
+ if ((ifp->if_flags & IFF_MONITOR) != 0) {
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ m_freem(m);
+ return;
+ }
+
if (ng_gif_input_p != NULL) {
(*ng_gif_input_p)(ifp, &m, af);
if (m == NULL)
- return;
+ goto drop;
}
/*
@@ -541,34 +624,15 @@
#endif
case AF_LINK:
n = sizeof(struct etherip_header) + sizeof(struct ether_header);
- if (n > m->m_len) {
+ if (n > m->m_len)
m = m_pullup(m, n);
- if (m == NULL) {
- ifp->if_ierrors++;
- return;
- }
- }
-
+ if (m == NULL)
+ goto drop;
eip = mtod(m, struct etherip_header *);
- /*
- * GIF_ACCEPT_REVETHIP (enabled by default) intentionally
- * accepts an EtherIP packet with revered version field in
- * the header. This is a knob for backward compatibility
- * with FreeBSD 7.2R or prior.
- */
- if (sc->gif_options & GIF_ACCEPT_REVETHIP) {
- if (eip->eip_resvl != ETHERIP_VERSION
- && eip->eip_ver != ETHERIP_VERSION) {
- /* discard unknown versions */
- m_freem(m);
- return;
- }
- } else {
- if (eip->eip_ver != ETHERIP_VERSION) {
- /* discard unknown versions */
- m_freem(m);
- return;
- }
+ if (eip->eip_ver != ETHERIP_VERSION) {
+ /* discard unknown versions */
+ m_freem(m);
+ goto drop;
}
m_adj(m, sizeof(struct etherip_header));
@@ -583,7 +647,7 @@
m->m_flags |= M_BCAST;
else
m->m_flags |= M_MCAST;
- ifp->if_imcasts++;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
}
BRIDGE_INPUT(ifp, m);
@@ -608,59 +672,61 @@
return;
}
- ifp->if_ipackets++;
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
M_SETFIB(m, ifp->if_fib);
netisr_dispatch(isr, m);
+ return;
+drop:
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
int
-gif_ioctl(ifp, cmd, data)
- struct ifnet *ifp;
- u_long cmd;
- caddr_t data;
+gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- struct gif_softc *sc = ifp->if_softc;
- struct ifreq *ifr = (struct ifreq*)data;
- int error = 0, size;
- u_int options;
+ GIF_RLOCK_TRACKER;
+ struct ifreq *ifr = (struct ifreq*)data;
struct sockaddr *dst, *src;
-#ifdef SIOCSIFMTU /* xxx */
- u_long mtu;
+ struct gif_softc *sc;
+#ifdef INET
+ struct sockaddr_in *sin = NULL;
#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6 = NULL;
+#endif
+ u_int options;
+ int error;
switch (cmd) {
case SIOCSIFADDR:
ifp->if_flags |= IFF_UP;
- break;
-
- case SIOCSIFDSTADDR:
- break;
-
case SIOCADDMULTI:
case SIOCDELMULTI:
- break;
-
-#ifdef SIOCSIFMTU /* xxx */
case SIOCGIFMTU:
- break;
-
+ case SIOCSIFFLAGS:
+ return (0);
case SIOCSIFMTU:
- mtu = ifr->ifr_mtu;
- if (mtu < GIF_MTU_MIN || mtu > GIF_MTU_MAX)
+ if (ifr->ifr_mtu < GIF_MTU_MIN ||
+ ifr->ifr_mtu > GIF_MTU_MAX)
return (EINVAL);
- ifp->if_mtu = mtu;
- break;
-#endif /* SIOCSIFMTU */
-
-#ifdef INET
+ else
+ ifp->if_mtu = ifr->ifr_mtu;
+ return (0);
+ }
+ sx_xlock(&gif_ioctl_sx);
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENXIO;
+ goto bad;
+ }
+ error = 0;
+ switch (cmd) {
case SIOCSIFPHYADDR:
-#endif
#ifdef INET6
case SIOCSIFPHYADDR_IN6:
-#endif /* INET6 */
- case SIOCSLIFPHYADDR:
+#endif
+ error = EINVAL;
switch (cmd) {
#ifdef INET
case SIOCSIFPHYADDR:
@@ -678,199 +744,169 @@
&(((struct in6_aliasreq *)data)->ifra_dstaddr);
break;
#endif
- case SIOCSLIFPHYADDR:
- src = (struct sockaddr *)
- &(((struct if_laddrreq *)data)->addr);
- dst = (struct sockaddr *)
- &(((struct if_laddrreq *)data)->dstaddr);
- break;
default:
- return EINVAL;
+ goto bad;
}
-
/* sa_family must be equal */
- if (src->sa_family != dst->sa_family)
- return EINVAL;
+ if (src->sa_family != dst->sa_family ||
+ src->sa_len != dst->sa_len)
+ goto bad;
/* validate sa_len */
+ /* check sa_family looks sane for the cmd */
switch (src->sa_family) {
#ifdef INET
case AF_INET:
if (src->sa_len != sizeof(struct sockaddr_in))
- return EINVAL;
+ goto bad;
+ if (cmd != SIOCSIFPHYADDR) {
+ error = EAFNOSUPPORT;
+ goto bad;
+ }
+ if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
+ satosin(dst)->sin_addr.s_addr == INADDR_ANY) {
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
break;
#endif
#ifdef INET6
case AF_INET6:
if (src->sa_len != sizeof(struct sockaddr_in6))
- return EINVAL;
+ goto bad;
+ if (cmd != SIOCSIFPHYADDR_IN6) {
+ error = EAFNOSUPPORT;
+ goto bad;
+ }
+ error = EADDRNOTAVAIL;
+ if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
+ ||
+ IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
+ goto bad;
+ /*
+ * Check validity of the scope zone ID of the
+ * addresses, and convert it into the kernel
+ * internal form if necessary.
+ */
+ error = sa6_embedscope(satosin6(src), 0);
+ if (error != 0)
+ goto bad;
+ error = sa6_embedscope(satosin6(dst), 0);
+ if (error != 0)
+ goto bad;
break;
#endif
default:
- return EAFNOSUPPORT;
+ error = EAFNOSUPPORT;
+ goto bad;
}
- switch (dst->sa_family) {
-#ifdef INET
- case AF_INET:
- if (dst->sa_len != sizeof(struct sockaddr_in))
- return EINVAL;
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- if (dst->sa_len != sizeof(struct sockaddr_in6))
- return EINVAL;
- break;
-#endif
- default:
- return EAFNOSUPPORT;
- }
-
- /* check sa_family looks sane for the cmd */
- switch (cmd) {
- case SIOCSIFPHYADDR:
- if (src->sa_family == AF_INET)
- break;
- return EAFNOSUPPORT;
-#ifdef INET6
- case SIOCSIFPHYADDR_IN6:
- if (src->sa_family == AF_INET6)
- break;
- return EAFNOSUPPORT;
-#endif /* INET6 */
- case SIOCSLIFPHYADDR:
- /* checks done in the above */
- break;
- }
-
- error = gif_set_tunnel(GIF2IFP(sc), src, dst);
+ error = gif_set_tunnel(ifp, src, dst);
break;
-
-#ifdef SIOCDIFPHYADDR
case SIOCDIFPHYADDR:
- gif_delete_tunnel(GIF2IFP(sc));
+ gif_delete_tunnel(ifp);
break;
-#endif
-
case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
#ifdef INET6
case SIOCGIFPSRCADDR_IN6:
-#endif /* INET6 */
- if (sc->gif_psrc == NULL) {
+ case SIOCGIFPDSTADDR_IN6:
+#endif
+ if (sc->gif_family == 0) {
error = EADDRNOTAVAIL;
- goto bad;
+ break;
}
- src = sc->gif_psrc;
+ GIF_RLOCK(sc);
switch (cmd) {
#ifdef INET
case SIOCGIFPSRCADDR:
- dst = &ifr->ifr_addr;
- size = sizeof(ifr->ifr_addr);
+ case SIOCGIFPDSTADDR:
+ if (sc->gif_family != AF_INET) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin = (struct sockaddr_in *)&ifr->ifr_addr;
+ memset(sin, 0, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
break;
-#endif /* INET */
+#endif
#ifdef INET6
case SIOCGIFPSRCADDR_IN6:
- dst = (struct sockaddr *)
+ case SIOCGIFPDSTADDR_IN6:
+ if (sc->gif_family != AF_INET6) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin6 = (struct sockaddr_in6 *)
&(((struct in6_ifreq *)data)->ifr_addr);
- size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
+ memset(sin6, 0, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
break;
-#endif /* INET6 */
+#endif
default:
- error = EADDRNOTAVAIL;
- goto bad;
+ error = EAFNOSUPPORT;
}
- if (src->sa_len > size)
- return EINVAL;
- bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
+ if (error == 0) {
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ sin->sin_addr = sc->gif_iphdr->ip_src;
+ break;
+ case SIOCGIFPDSTADDR:
+ sin->sin_addr = sc->gif_iphdr->ip_dst;
+ break;
+#endif
#ifdef INET6
- if (dst->sa_family == AF_INET6) {
- error = sa6_recoverscope((struct sockaddr_in6 *)dst);
- if (error != 0)
- return (error);
- }
+ case SIOCGIFPSRCADDR_IN6:
+ sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
+ break;
+ case SIOCGIFPDSTADDR_IN6:
+ sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
+ break;
#endif
- break;
-
- case SIOCGIFPDSTADDR:
-#ifdef INET6
- case SIOCGIFPDSTADDR_IN6:
-#endif /* INET6 */
- if (sc->gif_pdst == NULL) {
- error = EADDRNOTAVAIL;
- goto bad;
+ }
}
- src = sc->gif_pdst;
+ GIF_RUNLOCK(sc);
+ if (error != 0)
+ break;
switch (cmd) {
#ifdef INET
+ case SIOCGIFPSRCADDR:
case SIOCGIFPDSTADDR:
- dst = &ifr->ifr_addr;
- size = sizeof(ifr->ifr_addr);
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin);
+ if (error != 0)
+ memset(sin, 0, sizeof(*sin));
break;
-#endif /* INET */
+#endif
#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
case SIOCGIFPDSTADDR_IN6:
- dst = (struct sockaddr *)
- &(((struct in6_ifreq *)data)->ifr_addr);
- size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
- break;
-#endif /* INET6 */
- default:
- error = EADDRNOTAVAIL;
- goto bad;
- }
- if (src->sa_len > size)
- return EINVAL;
- error = prison_if(curthread->td_ucred, src);
- if (error != 0)
- return (error);
- error = prison_if(curthread->td_ucred, dst);
- if (error != 0)
- return (error);
- bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
-#ifdef INET6
- if (dst->sa_family == AF_INET6) {
- error = sa6_recoverscope((struct sockaddr_in6 *)dst);
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin6);
+ if (error == 0)
+ error = sa6_recoverscope(sin6);
if (error != 0)
- return (error);
- }
+ memset(sin6, 0, sizeof(*sin6));
#endif
- break;
-
- case SIOCGLIFPHYADDR:
- if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
- error = EADDRNOTAVAIL;
- goto bad;
}
-
- /* copy src */
- src = sc->gif_psrc;
- dst = (struct sockaddr *)
- &(((struct if_laddrreq *)data)->addr);
- size = sizeof(((struct if_laddrreq *)data)->addr);
- if (src->sa_len > size)
- return EINVAL;
- bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
-
- /* copy dst */
- src = sc->gif_pdst;
- dst = (struct sockaddr *)
- &(((struct if_laddrreq *)data)->dstaddr);
- size = sizeof(((struct if_laddrreq *)data)->dstaddr);
- if (src->sa_len > size)
- return EINVAL;
- bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
break;
-
- case SIOCSIFFLAGS:
- /* if_ioctl() takes care of it */
+ case SIOCGTUNFIB:
+ ifr->ifr_fib = sc->gif_fibnum;
break;
-
+ case SIOCSTUNFIB:
+ if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
+ break;
+ if (ifr->ifr_fib >= rt_numfibs)
+ error = EINVAL;
+ else
+ sc->gif_fibnum = ifr->ifr_fib;
+ break;
case GIFGOPTS:
options = sc->gif_options;
- error = copyout(&options, ifr->ifr_data,
- sizeof(options));
+ error = copyout(&options, ifr->ifr_data, sizeof(options));
break;
-
case GIFSOPTS:
if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
break;
@@ -882,151 +918,154 @@
else
sc->gif_options = options;
break;
-
default:
error = EINVAL;
break;
}
- bad:
- return error;
+bad:
+ sx_xunlock(&gif_ioctl_sx);
+ return (error);
}
-/*
- * XXXRW: There's a general event-ordering issue here: the code to check
- * if a given tunnel is already present happens before we perform a
- * potentially blocking setup of the tunnel. This code needs to be
- * re-ordered so that the check and replacement can be atomic using
- * a mutex.
- */
-int
-gif_set_tunnel(ifp, src, dst)
- struct ifnet *ifp;
- struct sockaddr *src;
- struct sockaddr *dst;
+static void
+gif_detach(struct gif_softc *sc)
{
- struct gif_softc *sc = ifp->if_softc;
- struct gif_softc *sc2;
- struct sockaddr *osrc, *odst, *sa;
- int error = 0;
- mtx_lock(&gif_mtx);
- LIST_FOREACH(sc2, &V_gif_softc_list, gif_list) {
- if (sc2 == sc)
- continue;
- if (!sc2->gif_pdst || !sc2->gif_psrc)
- continue;
- if (sc2->gif_pdst->sa_family != dst->sa_family ||
- sc2->gif_pdst->sa_len != dst->sa_len ||
- sc2->gif_psrc->sa_family != src->sa_family ||
- sc2->gif_psrc->sa_len != src->sa_len)
- continue;
+ sx_assert(&gif_ioctl_sx, SA_XLOCKED);
+ if (sc->gif_ecookie != NULL)
+ encap_detach(sc->gif_ecookie);
+ sc->gif_ecookie = NULL;
+}
- /*
- * Disallow parallel tunnels unless instructed
- * otherwise.
- */
- if (!V_parallel_tunnels &&
- bcmp(sc2->gif_pdst, dst, dst->sa_len) == 0 &&
- bcmp(sc2->gif_psrc, src, src->sa_len) == 0) {
- error = EADDRNOTAVAIL;
- mtx_unlock(&gif_mtx);
- goto bad;
- }
+static int
+gif_attach(struct gif_softc *sc, int af)
+{
- /* XXX both end must be valid? (I mean, not 0.0.0.0) */
+ sx_assert(&gif_ioctl_sx, SA_XLOCKED);
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ return (in_gif_attach(sc));
+#endif
+#ifdef INET6
+ case AF_INET6:
+ return (in6_gif_attach(sc));
+#endif
}
- mtx_unlock(&gif_mtx);
+ return (EAFNOSUPPORT);
+}
- /* XXX we can detach from both, but be polite just in case */
- if (sc->gif_psrc)
- switch (sc->gif_psrc->sa_family) {
+static int
+gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
+{
+ struct gif_softc *sc = ifp->if_softc;
+ struct gif_softc *tsc;
#ifdef INET
- case AF_INET:
- (void)in_gif_detach(sc);
- break;
+ struct ip *ip;
#endif
#ifdef INET6
- case AF_INET6:
- (void)in6_gif_detach(sc);
- break;
+ struct ip6_hdr *ip6;
#endif
- }
+ void *hdr;
+ int error = 0;
- osrc = sc->gif_psrc;
- sa = (struct sockaddr *)malloc(src->sa_len, M_IFADDR, M_WAITOK);
- bcopy((caddr_t)src, (caddr_t)sa, src->sa_len);
- sc->gif_psrc = sa;
-
- odst = sc->gif_pdst;
- sa = (struct sockaddr *)malloc(dst->sa_len, M_IFADDR, M_WAITOK);
- bcopy((caddr_t)dst, (caddr_t)sa, dst->sa_len);
- sc->gif_pdst = sa;
-
- switch (sc->gif_psrc->sa_family) {
+ if (sc == NULL)
+ return (ENXIO);
+ /* Disallow parallel tunnels unless instructed otherwise. */
+ if (V_parallel_tunnels == 0) {
+ GIF_LIST_LOCK();
+ LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
+ if (tsc == sc || tsc->gif_family != src->sa_family)
+ continue;
#ifdef INET
+ if (tsc->gif_family == AF_INET &&
+ tsc->gif_iphdr->ip_src.s_addr ==
+ satosin(src)->sin_addr.s_addr &&
+ tsc->gif_iphdr->ip_dst.s_addr ==
+ satosin(dst)->sin_addr.s_addr) {
+ error = EADDRNOTAVAIL;
+ GIF_LIST_UNLOCK();
+ goto bad;
+ }
+#endif
+#ifdef INET6
+ if (tsc->gif_family == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
+ &satosin6(src)->sin6_addr) &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
+ &satosin6(dst)->sin6_addr)) {
+ error = EADDRNOTAVAIL;
+ GIF_LIST_UNLOCK();
+ goto bad;
+ }
+#endif
+ }
+ GIF_LIST_UNLOCK();
+ }
+ switch (src->sa_family) {
+#ifdef INET
case AF_INET:
- error = in_gif_attach(sc);
+ hdr = ip = malloc(sizeof(struct ip), M_GIF,
+ M_WAITOK | M_ZERO);
+ ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
+ ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
break;
#endif
#ifdef INET6
case AF_INET6:
- /*
- * Check validity of the scope zone ID of the addresses, and
- * convert it into the kernel internal form if necessary.
- */
- error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_psrc, 0);
- if (error != 0)
- break;
- error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_pdst, 0);
- if (error != 0)
- break;
- error = in6_gif_attach(sc);
+ hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
+ M_WAITOK | M_ZERO);
+ ip6->ip6_src = satosin6(src)->sin6_addr;
+ ip6->ip6_dst = satosin6(dst)->sin6_addr;
+ ip6->ip6_vfc = IPV6_VERSION;
break;
#endif
- }
- if (error) {
- /* rollback */
- free((caddr_t)sc->gif_psrc, M_IFADDR);
- free((caddr_t)sc->gif_pdst, M_IFADDR);
- sc->gif_psrc = osrc;
- sc->gif_pdst = odst;
- goto bad;
- }
+ default:
+ return (EAFNOSUPPORT);
+ };
- if (osrc)
- free((caddr_t)osrc, M_IFADDR);
- if (odst)
- free((caddr_t)odst, M_IFADDR);
+ if (sc->gif_family != src->sa_family)
+ gif_detach(sc);
+ if (sc->gif_family == 0 ||
+ sc->gif_family != src->sa_family)
+ error = gif_attach(sc, src->sa_family);
- bad:
- if (sc->gif_psrc && sc->gif_pdst)
+ GIF_WLOCK(sc);
+ if (sc->gif_family != 0)
+ free(sc->gif_hdr, M_GIF);
+ sc->gif_family = src->sa_family;
+ sc->gif_hdr = hdr;
+ GIF_WUNLOCK(sc);
+#if defined(INET) || defined(INET6)
+bad:
+#endif
+ if (error == 0 && sc->gif_family != 0) {
ifp->if_drv_flags |= IFF_DRV_RUNNING;
- else
+ if_link_state_change(ifp, LINK_STATE_UP);
+ } else {
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-
- return error;
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+ }
+ return (error);
}
-void
-gif_delete_tunnel(ifp)
- struct ifnet *ifp;
+static void
+gif_delete_tunnel(struct ifnet *ifp)
{
struct gif_softc *sc = ifp->if_softc;
+ int family;
- if (sc->gif_psrc) {
- free((caddr_t)sc->gif_psrc, M_IFADDR);
- sc->gif_psrc = NULL;
+ if (sc == NULL)
+ return;
+
+ GIF_WLOCK(sc);
+ family = sc->gif_family;
+ sc->gif_family = 0;
+ GIF_WUNLOCK(sc);
+ if (family != 0) {
+ gif_detach(sc);
+ free(sc->gif_hdr, M_GIF);
}
- if (sc->gif_pdst) {
- free((caddr_t)sc->gif_pdst, M_IFADDR);
- sc->gif_pdst = NULL;
- }
- /* it is safe to detach from both */
-#ifdef INET
- (void)in_gif_detach(sc);
-#endif
-#ifdef INET6
- (void)in6_gif_detach(sc);
-#endif
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_link_state_change(ifp, LINK_STATE_DOWN);
}
Modified: trunk/sys/net/if_gif.h
===================================================================
--- trunk/sys/net/if_gif.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_gif.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,5 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/net/if_gif.h 207369 2010-04-29 11:52:42Z bz $ */
+/* $FreeBSD: stable/10/sys/net/if_gif.h 287730 2015-09-13 01:35:40Z hrs $ */
/* $KAME: if_gif.h,v 1.17 2000/09/11 11:36:41 sumikawa Exp $ */
/*-
@@ -31,21 +31,17 @@
* SUCH DAMAGE.
*/
-/*
- * if_gif.h
- */
-
#ifndef _NET_IF_GIF_H_
#define _NET_IF_GIF_H_
-
#ifdef _KERNEL
#include "opt_inet.h"
#include "opt_inet6.h"
#include <netinet/in.h>
-/* xxx sigh, why route have struct route instead of pointer? */
+struct ip;
+struct ip6_hdr;
struct encaptab;
extern void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp,
@@ -57,35 +53,38 @@
extern void (*ng_gif_detach_p)(struct ifnet *ifp);
struct gif_softc {
- struct ifnet *gif_ifp;
- struct mtx gif_mtx;
- struct sockaddr *gif_psrc; /* Physical src addr */
- struct sockaddr *gif_pdst; /* Physical dst addr */
+ struct ifnet *gif_ifp;
+ struct rmlock gif_lock;
+ const struct encaptab *gif_ecookie;
+ int gif_family;
+ int gif_flags;
+ u_int gif_fibnum;
+ u_int gif_options;
+ void *gif_netgraph; /* netgraph node info */
union {
- struct route gifscr_ro; /* xxx */
+ void *hdr;
+ struct ip *iphdr;
#ifdef INET6
- struct route_in6 gifscr_ro6; /* xxx */
+ struct ip6_hdr *ip6hdr;
#endif
- } gifsc_gifscr;
- int gif_flags;
- u_int gif_fibnum;
- const struct encaptab *encap_cookie4;
- const struct encaptab *encap_cookie6;
- void *gif_netgraph; /* ng_gif(4) netgraph node info */
- u_int gif_options;
- LIST_ENTRY(gif_softc) gif_list; /* all gif's are linked */
+ } gif_uhdr;
+ LIST_ENTRY(gif_softc) gif_list; /* all gif's are linked */
};
#define GIF2IFP(sc) ((sc)->gif_ifp)
-#define GIF_LOCK_INIT(sc) mtx_init(&(sc)->gif_mtx, "gif softc", \
- NULL, MTX_DEF)
-#define GIF_LOCK_DESTROY(sc) mtx_destroy(&(sc)->gif_mtx)
-#define GIF_LOCK(sc) mtx_lock(&(sc)->gif_mtx)
-#define GIF_UNLOCK(sc) mtx_unlock(&(sc)->gif_mtx)
-#define GIF_LOCK_ASSERT(sc) mtx_assert(&(sc)->gif_mtx, MA_OWNED)
+#define GIF_LOCK_INIT(sc) rm_init(&(sc)->gif_lock, "gif softc")
+#define GIF_LOCK_DESTROY(sc) rm_destroy(&(sc)->gif_lock)
+#define GIF_RLOCK_TRACKER struct rm_priotracker gif_tracker
+#define GIF_RLOCK(sc) rm_rlock(&(sc)->gif_lock, &gif_tracker)
+#define GIF_RUNLOCK(sc) rm_runlock(&(sc)->gif_lock, &gif_tracker)
+#define GIF_RLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_RLOCKED)
+#define GIF_WLOCK(sc) rm_wlock(&(sc)->gif_lock)
+#define GIF_WUNLOCK(sc) rm_wunlock(&(sc)->gif_lock)
+#define GIF_WLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_WLOCKED)
-#define gif_ro gifsc_gifscr.gifscr_ro
+#define gif_iphdr gif_uhdr.iphdr
+#define gif_hdr gif_uhdr.hdr
#ifdef INET6
-#define gif_ro6 gifsc_gifscr.gifscr_ro6
+#define gif_ip6hdr gif_uhdr.ip6hdr
#endif
#define GIF_MTU (1280) /* Default MTU */
@@ -92,9 +91,6 @@
#define GIF_MTU_MIN (1280) /* Minimum MTU */
#define GIF_MTU_MAX (8192) /* Maximum MTU */
-#define MTAG_GIF 1080679712
-#define MTAG_GIF_CALLED 0
-
struct etherip_header {
#if BYTE_ORDER == LITTLE_ENDIAN
u_int eip_resvl:4, /* reserved */
@@ -112,20 +108,26 @@
#define ETHERIP_ALIGN 2
/* Prototypes */
-void gif_input(struct mbuf *, int, struct ifnet *);
-int gif_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+void gif_input(struct mbuf *, struct ifnet *, int, uint8_t);
+int gif_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
-int gif_ioctl(struct ifnet *, u_long, caddr_t);
-int gif_set_tunnel(struct ifnet *, struct sockaddr *, struct sockaddr *);
-void gif_delete_tunnel(struct ifnet *);
int gif_encapcheck(const struct mbuf *, int, int, void *);
+#ifdef INET
+int in_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
+int in_gif_encapcheck(const struct mbuf *, int, int, void *);
+int in_gif_attach(struct gif_softc *);
+#endif
+#ifdef INET6
+int in6_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
+int in6_gif_encapcheck(const struct mbuf *, int, int, void *);
+int in6_gif_attach(struct gif_softc *);
+#endif
#endif /* _KERNEL */
#define GIFGOPTS _IOWR('i', 150, struct ifreq)
#define GIFSOPTS _IOW('i', 151, struct ifreq)
-#define GIF_ACCEPT_REVETHIP 0x0001
-#define GIF_SEND_REVETHIP 0x0010
-#define GIF_OPTMASK (GIF_ACCEPT_REVETHIP|GIF_SEND_REVETHIP)
+#define GIF_IGNORE_SOURCE 0x0002
+#define GIF_OPTMASK (GIF_IGNORE_SOURCE)
#endif /* _NET_IF_GIF_H_ */
Modified: trunk/sys/net/if_gre.c
===================================================================
--- trunk/sys/net/if_gre.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_gre.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,9 +1,7 @@
/* $MidnightBSD$ */
-/* $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */
-/* $FreeBSD: stable/9/sys/net/if_gre.c 248085 2013-03-09 02:36:32Z marius $ */
-
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae at FreeBSD.org>
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -31,18 +29,13 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $
*/
-/*
- * Encapsulate L3 protocols into IP
- * See RFC 2784 (successor of RFC 1701 and 1702) for more details.
- * If_gre is compatible with Cisco GRE tunnels, so you can
- * have a NetBSD box as the other end of a tunnel interface of a Cisco
- * router. See gre(4) for more details.
- * Also supported: IP in IP encaps (proto 55) as of RFC 2004
- */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/net/if_gre.c 293410 2016-01-08 02:58:10Z araujo $");
-#include "opt_atalk.h"
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -49,6 +42,7 @@
#include <sys/param.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/module.h>
@@ -56,98 +50,77 @@
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/sockio.h>
+#include <sys/sx.h>
#include <sys/sysctl.h>
+#include <sys/syslog.h>
#include <sys/systm.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_clone.h>
+#include <net/if_var.h>
#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/vnet.h>
#include <net/route.h>
-#include <net/vnet.h>
+#include <netinet/in.h>
#ifdef INET
-#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
-#include <netinet/ip_gre.h>
#include <netinet/ip_var.h>
-#include <netinet/ip_encap.h>
-#else
-#error "Huh? if_gre without inet?"
#endif
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/scope6_var.h>
+#endif
+
+#include <netinet/ip_encap.h>
#include <net/bpf.h>
-
#include <net/if_gre.h>
-/*
- * It is not easy to calculate the right value for a GRE MTU.
- * We leave this task to the admin and use the same default that
- * other vendors use.
- */
-#define GREMTU 1476
+#include <machine/in_cksum.h>
-#define GRENAME "gre"
+#include <security/mac/mac_framework.h>
+#define GREMTU 1500
+static const char grename[] = "gre";
+static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
+static VNET_DEFINE(struct mtx, gre_mtx);
+#define V_gre_mtx VNET(gre_mtx)
+#define GRE_LIST_LOCK_INIT(x) mtx_init(&V_gre_mtx, "gre_mtx", NULL, \
+ MTX_DEF)
+#define GRE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gre_mtx)
+#define GRE_LIST_LOCK(x) mtx_lock(&V_gre_mtx)
+#define GRE_LIST_UNLOCK(x) mtx_unlock(&V_gre_mtx)
-#define MTAG_COOKIE_GRE 1307983903
-#define MTAG_GRE_NESTING 1
-struct mtag_gre_nesting {
- uint16_t count;
- uint16_t max;
- struct ifnet *ifp[];
-};
+static VNET_DEFINE(LIST_HEAD(, gre_softc), gre_softc_list);
+#define V_gre_softc_list VNET(gre_softc_list)
+static struct sx gre_ioctl_sx;
+SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl");
-/*
- * gre_mtx protects all global variables in if_gre.c.
- * XXX: gre_softc data not protected yet.
- */
-struct mtx gre_mtx;
-static MALLOC_DEFINE(M_GRE, GRENAME, "Generic Routing Encapsulation");
-
-struct gre_softc_head gre_softc_list;
-
static int gre_clone_create(struct if_clone *, int, caddr_t);
static void gre_clone_destroy(struct ifnet *);
+static VNET_DEFINE(struct if_clone *, gre_cloner);
+#define V_gre_cloner VNET(gre_cloner)
+
+static void gre_qflush(struct ifnet *);
+static int gre_transmit(struct ifnet *, struct mbuf *);
static int gre_ioctl(struct ifnet *, u_long, caddr_t);
-static int gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *ro);
+static int gre_output(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
-IFC_SIMPLE_DECLARE(gre, 0);
+static void gre_updatehdr(struct gre_softc *);
+static int gre_set_tunnel(struct ifnet *, struct sockaddr *,
+ struct sockaddr *);
+static void gre_delete_tunnel(struct ifnet *);
-static int gre_compute_route(struct gre_softc *sc);
-
-static void greattach(void);
-
-#ifdef INET
-extern struct domain inetdomain;
-static const struct protosw in_gre_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_GRE,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = gre_input,
- .pr_output = (pr_output_t *)rip_output,
- .pr_ctlinput = rip_ctlinput,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-static const struct protosw in_mobile_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_MOBILE,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = gre_mobile_input,
- .pr_output = (pr_output_t *)rip_output,
- .pr_ctlinput = rip_ctlinput,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-#endif
-
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
"Generic Routing Encapsulation");
@@ -162,795 +135,846 @@
*/
#define MAX_GRE_NEST 1
#endif
-static int max_gre_nesting = MAX_GRE_NEST;
-SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW,
- &max_gre_nesting, 0, "Max nested tunnels");
-/* ARGSUSED */
+static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST;
+#define V_max_gre_nesting VNET(max_gre_nesting)
+SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
+ &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels");
+
static void
-greattach(void)
+vnet_gre_init(const void *unused __unused)
{
+ LIST_INIT(&V_gre_softc_list);
+ GRE_LIST_LOCK_INIT();
+ V_gre_cloner = if_clone_simple(grename, gre_clone_create,
+ gre_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_gre_init, NULL);
- mtx_init(&gre_mtx, "gre_mtx", NULL, MTX_DEF);
- LIST_INIT(&gre_softc_list);
- if_clone_attach(&gre_cloner);
+static void
+vnet_gre_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(V_gre_cloner);
+ GRE_LIST_LOCK_DESTROY();
}
+VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_gre_uninit, NULL);
static int
-gre_clone_create(ifc, unit, params)
- struct if_clone *ifc;
- int unit;
- caddr_t params;
+gre_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
struct gre_softc *sc;
sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
-
+ sc->gre_fibnum = curthread->td_proc->p_fibnum;
GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
- if (GRE2IFP(sc) == NULL) {
- free(sc, M_GRE);
- return (ENOSPC);
- }
-
+ GRE_LOCK_INIT(sc);
GRE2IFP(sc)->if_softc = sc;
- if_initname(GRE2IFP(sc), ifc->ifc_name, unit);
+ if_initname(GRE2IFP(sc), grename, unit);
- GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
- GRE2IFP(sc)->if_addrlen = 0;
- GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */
- GRE2IFP(sc)->if_mtu = GREMTU;
+ GRE2IFP(sc)->if_mtu = sc->gre_mtu = GREMTU;
GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
GRE2IFP(sc)->if_output = gre_output;
GRE2IFP(sc)->if_ioctl = gre_ioctl;
- sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
- sc->g_proto = IPPROTO_GRE;
- GRE2IFP(sc)->if_flags |= IFF_LINK0;
- sc->encap = NULL;
- sc->gre_fibnum = curthread->td_proc->p_fibnum;
- sc->wccp_ver = WCCP_V1;
- sc->key = 0;
+ GRE2IFP(sc)->if_transmit = gre_transmit;
+ GRE2IFP(sc)->if_qflush = gre_qflush;
+ GRE2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
+ GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
if_attach(GRE2IFP(sc));
bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
- mtx_lock(&gre_mtx);
- LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
- mtx_unlock(&gre_mtx);
+ GRE_LIST_LOCK();
+ LIST_INSERT_HEAD(&V_gre_softc_list, sc, gre_list);
+ GRE_LIST_UNLOCK();
return (0);
}
static void
-gre_clone_destroy(ifp)
- struct ifnet *ifp;
+gre_clone_destroy(struct ifnet *ifp)
{
- struct gre_softc *sc = ifp->if_softc;
+ struct gre_softc *sc;
- mtx_lock(&gre_mtx);
- LIST_REMOVE(sc, sc_list);
- mtx_unlock(&gre_mtx);
-
-#ifdef INET
- if (sc->encap != NULL)
- encap_detach(sc->encap);
-#endif
+ sx_xlock(&gre_ioctl_sx);
+ sc = ifp->if_softc;
+ gre_delete_tunnel(ifp);
+ GRE_LIST_LOCK();
+ LIST_REMOVE(sc, gre_list);
+ GRE_LIST_UNLOCK();
bpfdetach(ifp);
if_detach(ifp);
+ ifp->if_softc = NULL;
+ sx_xunlock(&gre_ioctl_sx);
+
if_free(ifp);
+ GRE_LOCK_DESTROY(sc);
free(sc, M_GRE);
}
-/*
- * The output routine. Takes a packet and encapsulates it in the protocol
- * given by sc->g_proto. See also RFC 1701 and RFC 2004
- */
static int
-gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
- struct route *ro)
+gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- int error = 0;
- struct gre_softc *sc = ifp->if_softc;
- struct greip *gh;
- struct ip *ip;
- struct m_tag *mtag;
- struct mtag_gre_nesting *gt;
- size_t len;
- u_short gre_ip_id = 0;
- uint8_t gre_ip_tos = 0;
- u_int16_t etype = 0;
- struct mobile_h mob_h;
- u_int32_t af;
- int extra = 0, max;
+ GRE_RLOCK_TRACKER;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct sockaddr *src, *dst;
+ struct gre_softc *sc;
+#ifdef INET
+ struct sockaddr_in *sin = NULL;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6 = NULL;
+#endif
+ uint32_t opt;
+ int error;
- /*
- * gre may cause infinite recursion calls when misconfigured. High
- * nesting level may cause stack exhaustion. We'll prevent this by
- * detecting loops and by introducing upper limit.
- */
- mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL);
- if (mtag != NULL) {
- struct ifnet **ifp2;
-
- gt = (struct mtag_gre_nesting *)(mtag + 1);
- gt->count++;
- if (gt->count > min(gt->max,max_gre_nesting)) {
- printf("%s: hit maximum recursion limit %u on %s\n",
- __func__, gt->count - 1, ifp->if_xname);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
- }
-
- ifp2 = gt->ifp;
- for (max = gt->count - 1; max > 0; max--) {
- if (*ifp2 == ifp)
- break;
- ifp2++;
- }
- if (*ifp2 == ifp) {
- printf("%s: detected loop with nexting %u on %s\n",
- __func__, gt->count-1, ifp->if_xname);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
- }
- *ifp2 = ifp;
-
- } else {
- /*
- * Given that people should NOT increase max_gre_nesting beyond
- * their real needs, we allocate once per packet rather than
- * allocating an mtag once per passing through gre.
- *
- * Note: the sysctl does not actually check for saneness, so we
- * limit the maximum numbers of possible recursions here.
- */
- max = imin(max_gre_nesting, 256);
- /* If someone sets the sysctl <= 0, we want at least 1. */
- max = imax(max, 1);
- len = sizeof(struct mtag_gre_nesting) +
- max * sizeof(struct ifnet *);
- mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len,
- M_NOWAIT);
- if (mtag == NULL) {
- m_freem(m);
- error = ENOMEM;
- goto end;
- }
- gt = (struct mtag_gre_nesting *)(mtag + 1);
- bzero(gt, len);
- gt->count = 1;
- gt->max = max;
- *gt->ifp = ifp;
- m_tag_prepend(m, mtag);
+ switch (cmd) {
+ case SIOCSIFMTU:
+ /* XXX: */
+ if (ifr->ifr_mtu < 576)
+ return (EINVAL);
+ break;
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ case SIOCSIFFLAGS:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ return (0);
+ case GRESADDRS:
+ case GRESADDRD:
+ case GREGADDRS:
+ case GREGADDRD:
+ case GRESPROTO:
+ case GREGPROTO:
+ return (EOPNOTSUPP);
}
-
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
- sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
- m_freem(m);
- error = ENETDOWN;
+ src = dst = NULL;
+ sx_xlock(&gre_ioctl_sx);
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENXIO;
goto end;
}
-
- gh = NULL;
- ip = NULL;
-
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
- bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
-
- if (bpf_peers_present(ifp->if_bpf)) {
- af = dst->sa_family;
- bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
- }
-
- m->m_flags &= ~(M_BCAST|M_MCAST);
-
- if (sc->g_proto == IPPROTO_MOBILE) {
- if (dst->sa_family == AF_INET) {
- struct mbuf *m0;
- int msiz;
-
- ip = mtod(m, struct ip *);
-
- /*
- * RFC2004 specifies that fragmented diagrams shouldn't
- * be encapsulated.
- */
- if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
- _IF_DROP(&ifp->if_snd);
- m_freem(m);
- error = EINVAL; /* is there better errno? */
- goto end;
- }
- memset(&mob_h, 0, MOB_H_SIZ_L);
- mob_h.proto = (ip->ip_p) << 8;
- mob_h.odst = ip->ip_dst.s_addr;
- ip->ip_dst.s_addr = sc->g_dst.s_addr;
-
- /*
- * If the packet comes from our host, we only change
- * the destination address in the IP header.
- * Else we also need to save and change the source
- */
- if (in_hosteq(ip->ip_src, sc->g_src)) {
- msiz = MOB_H_SIZ_S;
- } else {
- mob_h.proto |= MOB_H_SBIT;
- mob_h.osrc = ip->ip_src.s_addr;
- ip->ip_src.s_addr = sc->g_src.s_addr;
- msiz = MOB_H_SIZ_L;
- }
- mob_h.proto = htons(mob_h.proto);
- mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
-
- if ((m->m_data - msiz) < m->m_pktdat) {
- /* need new mbuf */
- MGETHDR(m0, M_DONTWAIT, MT_DATA);
- if (m0 == NULL) {
- _IF_DROP(&ifp->if_snd);
- m_freem(m);
- error = ENOBUFS;
- goto end;
- }
- m0->m_next = m;
- m->m_data += sizeof(struct ip);
- m->m_len -= sizeof(struct ip);
- m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
- m0->m_len = msiz + sizeof(struct ip);
- m0->m_data += max_linkhdr;
- memcpy(mtod(m0, caddr_t), (caddr_t)ip,
- sizeof(struct ip));
- m = m0;
- } else { /* we have some space left in the old one */
- m->m_data -= msiz;
- m->m_len += msiz;
- m->m_pkthdr.len += msiz;
- bcopy(ip, mtod(m, caddr_t),
- sizeof(struct ip));
- }
- ip = mtod(m, struct ip *);
- memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
- ip->ip_len = ntohs(ip->ip_len) + msiz;
- } else { /* AF_INET */
- _IF_DROP(&ifp->if_snd);
- m_freem(m);
- error = EINVAL;
- goto end;
- }
- } else if (sc->g_proto == IPPROTO_GRE) {
- switch (dst->sa_family) {
- case AF_INET:
- ip = mtod(m, struct ip *);
- gre_ip_tos = ip->ip_tos;
- gre_ip_id = ip->ip_id;
- if (sc->wccp_ver == WCCP_V2) {
- extra = sizeof(uint32_t);
- etype = WCCP_PROTOCOL_TYPE;
- } else {
- etype = ETHERTYPE_IP;
- }
- break;
+ error = 0;
+ switch (cmd) {
+ case SIOCSIFMTU:
+ GRE_WLOCK(sc);
+ sc->gre_mtu = ifr->ifr_mtu;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
+ goto end;
+ case SIOCSIFPHYADDR:
#ifdef INET6
- case AF_INET6:
- gre_ip_id = ip_newid();
- etype = ETHERTYPE_IPV6;
+ case SIOCSIFPHYADDR_IN6:
+#endif
+ error = EINVAL;
+ switch (cmd) {
+#ifdef INET
+ case SIOCSIFPHYADDR:
+ src = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_dstaddr);
break;
#endif
-#ifdef NETATALK
- case AF_APPLETALK:
- etype = ETHERTYPE_ATALK;
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+ src = (struct sockaddr *)
+ &(((struct in6_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr *)
+ &(((struct in6_aliasreq *)data)->ifra_dstaddr);
break;
#endif
default:
- _IF_DROP(&ifp->if_snd);
- m_freem(m);
error = EAFNOSUPPORT;
goto end;
}
-
- /* Reserve space for GRE header + optional GRE key */
- int hdrlen = sizeof(struct greip) + extra;
- if (sc->key)
- hdrlen += sizeof(uint32_t);
- M_PREPEND(m, hdrlen, M_DONTWAIT);
- } else {
- _IF_DROP(&ifp->if_snd);
- m_freem(m);
- error = EINVAL;
- goto end;
- }
+ /* sa_family must be equal */
+ if (src->sa_family != dst->sa_family ||
+ src->sa_len != dst->sa_len)
+ goto end;
- if (m == NULL) { /* mbuf allocation failed */
- _IF_DROP(&ifp->if_snd);
- error = ENOBUFS;
- goto end;
- }
-
- M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */
-
- gh = mtod(m, struct greip *);
- if (sc->g_proto == IPPROTO_GRE) {
- uint32_t *options = gh->gi_options;
-
- memset((void *)gh, 0, sizeof(struct greip) + extra);
- gh->gi_ptype = htons(etype);
- gh->gi_flags = 0;
-
- /* Add key option */
- if (sc->key)
- {
- gh->gi_flags |= htons(GRE_KP);
- *(options++) = htonl(sc->key);
- }
- }
-
- gh->gi_pr = sc->g_proto;
- if (sc->g_proto != IPPROTO_MOBILE) {
- gh->gi_src = sc->g_src;
- gh->gi_dst = sc->g_dst;
- ((struct ip*)gh)->ip_v = IPPROTO_IPV4;
- ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
- ((struct ip*)gh)->ip_ttl = GRE_TTL;
- ((struct ip*)gh)->ip_tos = gre_ip_tos;
- ((struct ip*)gh)->ip_id = gre_ip_id;
- gh->gi_len = m->m_pkthdr.len;
- }
-
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
- /*
- * Send it off and with IP_FORWARD flag to prevent it from
- * overwriting the ip_id again. ip_id is already set to the
- * ip_id of the encapsulated packet.
- */
- error = ip_output(m, NULL, &sc->route, IP_FORWARDING,
- (struct ip_moptions *)NULL, (struct inpcb *)NULL);
- end:
- if (error)
- ifp->if_oerrors++;
- return (error);
-}
-
-static int
-gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct ifreq *ifr = (struct ifreq *)data;
- struct if_laddrreq *lifr = (struct if_laddrreq *)data;
- struct in_aliasreq *aifr = (struct in_aliasreq *)data;
- struct gre_softc *sc = ifp->if_softc;
- int s;
- struct sockaddr_in si;
- struct sockaddr *sa = NULL;
- int error, adj;
- struct sockaddr_in sp, sm, dp, dm;
- uint32_t key;
-
- error = 0;
- adj = 0;
-
- s = splnet();
- switch (cmd) {
- case SIOCSIFADDR:
- ifp->if_flags |= IFF_UP;
- break;
- case SIOCSIFDSTADDR:
- break;
- case SIOCSIFFLAGS:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0)
- break;
- if ((ifr->ifr_flags & IFF_LINK0) != 0)
- sc->g_proto = IPPROTO_GRE;
- else
- sc->g_proto = IPPROTO_MOBILE;
- if ((ifr->ifr_flags & IFF_LINK2) != 0)
- sc->wccp_ver = WCCP_V2;
- else
- sc->wccp_ver = WCCP_V1;
- goto recompute;
- case SIOCSIFMTU:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0)
- break;
- if (ifr->ifr_mtu < 576) {
- error = EINVAL;
- break;
- }
- ifp->if_mtu = ifr->ifr_mtu;
- break;
- case SIOCGIFMTU:
- ifr->ifr_mtu = GRE2IFP(sc)->if_mtu;
- break;
- case SIOCADDMULTI:
- /*
- * XXXRW: Isn't this priv_checkr() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0)
- break;
- if (ifr == 0) {
- error = EAFNOSUPPORT;
- break;
- }
- switch (ifr->ifr_addr.sa_family) {
+ /* validate sa_len */
+ switch (src->sa_family) {
#ifdef INET
case AF_INET:
+ if (src->sa_len != sizeof(struct sockaddr_in))
+ goto end;
break;
#endif
#ifdef INET6
case AF_INET6:
+ if (src->sa_len != sizeof(struct sockaddr_in6))
+ goto end;
break;
#endif
default:
error = EAFNOSUPPORT;
- break;
+ goto end;
}
- break;
- case SIOCDELMULTI:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0)
- break;
- if (ifr == 0) {
- error = EAFNOSUPPORT;
- break;
+ /* check sa_family looks sane for the cmd */
+ error = EAFNOSUPPORT;
+ switch (cmd) {
+#ifdef INET
+ case SIOCSIFPHYADDR:
+ if (src->sa_family == AF_INET)
+ break;
+ goto end;
+#endif
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+ if (src->sa_family == AF_INET6)
+ break;
+ goto end;
+#endif
}
- switch (ifr->ifr_addr.sa_family) {
+ error = EADDRNOTAVAIL;
+ switch (src->sa_family) {
#ifdef INET
case AF_INET:
+ if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
+ satosin(dst)->sin_addr.s_addr == INADDR_ANY)
+ goto end;
break;
#endif
#ifdef INET6
case AF_INET6:
- break;
+ if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
+ ||
+ IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
+ goto end;
+ /*
+ * Check validity of the scope zone ID of the
+ * addresses, and convert it into the kernel
+ * internal form if necessary.
+ */
+ error = sa6_embedscope(satosin6(src), 0);
+ if (error != 0)
+ goto end;
+ error = sa6_embedscope(satosin6(dst), 0);
+ if (error != 0)
+ goto end;
#endif
- default:
- error = EAFNOSUPPORT;
+ };
+ error = gre_set_tunnel(ifp, src, dst);
+ break;
+ case SIOCDIFPHYADDR:
+ gre_delete_tunnel(ifp);
+ break;
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
+#endif
+ if (sc->gre_family == 0) {
+ error = EADDRNOTAVAIL;
break;
}
- break;
- case GRESPROTO:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
+ GRE_RLOCK(sc);
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ if (sc->gre_family != AF_INET) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin = (struct sockaddr_in *)&ifr->ifr_addr;
+ memset(sin, 0, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
break;
- sc->g_proto = ifr->ifr_flags;
- switch (sc->g_proto) {
- case IPPROTO_GRE:
- ifp->if_flags |= IFF_LINK0;
+#endif
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
+ if (sc->gre_family != AF_INET6) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin6 = (struct sockaddr_in6 *)
+ &(((struct in6_ifreq *)data)->ifr_addr);
+ memset(sin6, 0, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
break;
- case IPPROTO_MOBILE:
- ifp->if_flags &= ~IFF_LINK0;
- break;
- default:
- error = EPROTONOSUPPORT;
- break;
+#endif
}
- goto recompute;
- case GREGPROTO:
- ifr->ifr_flags = sc->g_proto;
- break;
- case GRESADDRS:
- case GRESADDRD:
- error = priv_check(curthread, PRIV_NET_GRE);
- if (error)
- return (error);
- /*
- * set tunnel endpoints, compute a less specific route
- * to the remote end and mark if as up
- */
- sa = &ifr->ifr_addr;
- if (cmd == GRESADDRS)
- sc->g_src = (satosin(sa))->sin_addr;
- if (cmd == GRESADDRD)
- sc->g_dst = (satosin(sa))->sin_addr;
- recompute:
+ if (error == 0) {
+ switch (cmd) {
#ifdef INET
- if (sc->encap != NULL) {
- encap_detach(sc->encap);
- sc->encap = NULL;
- }
+ case SIOCGIFPSRCADDR:
+ sin->sin_addr = sc->gre_oip.ip_src;
+ break;
+ case SIOCGIFPDSTADDR:
+ sin->sin_addr = sc->gre_oip.ip_dst;
+ break;
#endif
- if ((sc->g_src.s_addr != INADDR_ANY) &&
- (sc->g_dst.s_addr != INADDR_ANY)) {
- bzero(&sp, sizeof(sp));
- bzero(&sm, sizeof(sm));
- bzero(&dp, sizeof(dp));
- bzero(&dm, sizeof(dm));
- sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len =
- sizeof(struct sockaddr_in);
- sp.sin_family = sm.sin_family = dp.sin_family =
- dm.sin_family = AF_INET;
- sp.sin_addr = sc->g_src;
- dp.sin_addr = sc->g_dst;
- sm.sin_addr.s_addr = dm.sin_addr.s_addr =
- INADDR_BROADCAST;
-#ifdef INET
- sc->encap = encap_attach(AF_INET, sc->g_proto,
- sintosa(&sp), sintosa(&sm), sintosa(&dp),
- sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ?
- &in_gre_protosw : &in_mobile_protosw, sc);
- if (sc->encap == NULL)
- printf("%s: unable to attach encap\n",
- if_name(GRE2IFP(sc)));
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ sin6->sin6_addr = sc->gre_oip6.ip6_src;
+ break;
+ case SIOCGIFPDSTADDR_IN6:
+ sin6->sin6_addr = sc->gre_oip6.ip6_dst;
+ break;
#endif
- if (sc->route.ro_rt != 0) /* free old route */
- RTFREE(sc->route.ro_rt);
- if (gre_compute_route(sc) == 0)
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- else
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ }
}
- break;
- case GREGADDRS:
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_src.s_addr;
- sa = sintosa(&si);
- error = prison_if(curthread->td_ucred, sa);
+ GRE_RUNLOCK(sc);
if (error != 0)
break;
- ifr->ifr_addr = *sa;
- break;
- case GREGADDRD:
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_dst.s_addr;
- sa = sintosa(&si);
- error = prison_if(curthread->td_ucred, sa);
- if (error != 0)
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin);
+ if (error != 0)
+ memset(sin, 0, sizeof(*sin));
break;
- ifr->ifr_addr = *sa;
+#endif
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin6);
+ if (error == 0)
+ error = sa6_recoverscope(sin6);
+ if (error != 0)
+ memset(sin6, 0, sizeof(*sin6));
+#endif
+ }
break;
- case SIOCSIFPHYADDR:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
+ case SIOCGTUNFIB:
+ ifr->ifr_fib = sc->gre_fibnum;
+ break;
+ case SIOCSTUNFIB:
+ if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
break;
- if (aifr->ifra_addr.sin_family != AF_INET ||
- aifr->ifra_dstaddr.sin_family != AF_INET) {
- error = EAFNOSUPPORT;
- break;
- }
- if (aifr->ifra_addr.sin_len != sizeof(si) ||
- aifr->ifra_dstaddr.sin_len != sizeof(si)) {
+ if (ifr->ifr_fib >= rt_numfibs)
error = EINVAL;
+ else
+ sc->gre_fibnum = ifr->ifr_fib;
+ break;
+ case GRESKEY:
+ if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
break;
- }
- sc->g_src = aifr->ifra_addr.sin_addr;
- sc->g_dst = aifr->ifra_dstaddr.sin_addr;
- goto recompute;
- case SIOCSLIFPHYADDR:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
+ if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
break;
- if (lifr->addr.ss_family != AF_INET ||
- lifr->dstaddr.ss_family != AF_INET) {
- error = EAFNOSUPPORT;
- break;
+ if (sc->gre_key != opt) {
+ GRE_WLOCK(sc);
+ sc->gre_key = opt;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
}
- if (lifr->addr.ss_len != sizeof(si) ||
- lifr->dstaddr.ss_len != sizeof(si)) {
- error = EINVAL;
- break;
- }
- sc->g_src = (satosin(&lifr->addr))->sin_addr;
- sc->g_dst =
- (satosin(&lifr->dstaddr))->sin_addr;
- goto recompute;
- case SIOCDIFPHYADDR:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
- break;
- sc->g_src.s_addr = INADDR_ANY;
- sc->g_dst.s_addr = INADDR_ANY;
- goto recompute;
- case SIOCGLIFPHYADDR:
- if (sc->g_src.s_addr == INADDR_ANY ||
- sc->g_dst.s_addr == INADDR_ANY) {
- error = EADDRNOTAVAIL;
- break;
- }
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_src.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
- if (error != 0)
- break;
- memcpy(&lifr->addr, &si, sizeof(si));
- si.sin_addr.s_addr = sc->g_dst.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
- if (error != 0)
- break;
- memcpy(&lifr->dstaddr, &si, sizeof(si));
break;
- case SIOCGIFPSRCADDR:
-#ifdef INET6
- case SIOCGIFPSRCADDR_IN6:
-#endif
- if (sc->g_src.s_addr == INADDR_ANY) {
- error = EADDRNOTAVAIL;
- break;
- }
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_src.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
- if (error != 0)
- break;
- bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
+ case GREGKEY:
+ error = copyout(&sc->gre_key, ifr->ifr_data,
+ sizeof(sc->gre_key));
break;
- case SIOCGIFPDSTADDR:
-#ifdef INET6
- case SIOCGIFPDSTADDR_IN6:
-#endif
- if (sc->g_dst.s_addr == INADDR_ANY) {
- error = EADDRNOTAVAIL;
+ case GRESOPTS:
+ if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
break;
- }
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_dst.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
- if (error != 0)
+ if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
break;
- bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
- break;
- case GRESKEY:
- error = priv_check(curthread, PRIV_NET_GRE);
- if (error)
- break;
- error = copyin(ifr->ifr_data, &key, sizeof(key));
- if (error)
- break;
- /* adjust MTU for option header */
- if (key == 0 && sc->key != 0) /* clear */
- adj += sizeof(key);
- else if (key != 0 && sc->key == 0) /* set */
- adj -= sizeof(key);
-
- if (ifp->if_mtu + adj < 576) {
+ if (opt & ~GRE_OPTMASK)
error = EINVAL;
- break;
+ else {
+ if (sc->gre_options != opt) {
+ GRE_WLOCK(sc);
+ sc->gre_options = opt;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
+ }
}
- ifp->if_mtu += adj;
- sc->key = key;
break;
- case GREGKEY:
- error = copyout(&sc->key, ifr->ifr_data, sizeof(sc->key));
+
+ case GREGOPTS:
+ error = copyout(&sc->gre_options, ifr->ifr_data,
+ sizeof(sc->gre_options));
break;
-
default:
error = EINVAL;
break;
}
-
- splx(s);
+end:
+ sx_xunlock(&gre_ioctl_sx);
return (error);
}
-/*
- * computes a route to our destination that is not the one
- * which would be taken by ip_output(), as this one will loop back to
- * us. If the interface is p2p as a--->b, then a routing entry exists
- * If we now send a packet to b (e.g. ping b), this will come down here
- * gets src=a, dst=b tacked on and would from ip_output() sent back to
- * if_gre.
- * Goal here is to compute a route to b that is less specific than
- * a-->b. We know that this one exists as in normal operation we have
- * at least a default route which matches.
- */
+static void
+gre_updatehdr(struct gre_softc *sc)
+{
+ struct grehdr *gh = NULL;
+ uint32_t *opts;
+ uint16_t flags;
+
+ GRE_WLOCK_ASSERT(sc);
+ switch (sc->gre_family) {
+#ifdef INET
+ case AF_INET:
+ sc->gre_hlen = sizeof(struct greip);
+ sc->gre_oip.ip_v = IPPROTO_IPV4;
+ sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
+ sc->gre_oip.ip_p = IPPROTO_GRE;
+ gh = &sc->gre_gihdr->gi_gre;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sc->gre_hlen = sizeof(struct greip6);
+ sc->gre_oip6.ip6_vfc = IPV6_VERSION;
+ sc->gre_oip6.ip6_nxt = IPPROTO_GRE;
+ gh = &sc->gre_gi6hdr->gi6_gre;
+ break;
+#endif
+ default:
+ return;
+ }
+ flags = 0;
+ opts = gh->gre_opts;
+ if (sc->gre_options & GRE_ENABLE_CSUM) {
+ flags |= GRE_FLAGS_CP;
+ sc->gre_hlen += 2 * sizeof(uint16_t);
+ *opts++ = 0;
+ }
+ if (sc->gre_key != 0) {
+ flags |= GRE_FLAGS_KP;
+ sc->gre_hlen += sizeof(uint32_t);
+ *opts++ = htonl(sc->gre_key);
+ }
+ if (sc->gre_options & GRE_ENABLE_SEQ) {
+ flags |= GRE_FLAGS_SP;
+ sc->gre_hlen += sizeof(uint32_t);
+ *opts++ = 0;
+ } else
+ sc->gre_oseq = 0;
+ gh->gre_flags = htons(flags);
+ GRE2IFP(sc)->if_mtu = sc->gre_mtu - sc->gre_hlen;
+}
+
+static void
+gre_detach(struct gre_softc *sc)
+{
+
+ sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+ if (sc->gre_ecookie != NULL)
+ encap_detach(sc->gre_ecookie);
+ sc->gre_ecookie = NULL;
+}
+
static int
-gre_compute_route(struct gre_softc *sc)
+gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src,
+ struct sockaddr *dst)
{
- struct route *ro;
+ struct gre_softc *sc, *tsc;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+#ifdef INET
+ struct ip *ip;
+#endif
+ void *hdr;
+ int error;
- ro = &sc->route;
+ sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+ GRE_LIST_LOCK();
+ sc = ifp->if_softc;
+ LIST_FOREACH(tsc, &V_gre_softc_list, gre_list) {
+ if (tsc == sc || tsc->gre_family != src->sa_family)
+ continue;
+#ifdef INET
+ if (tsc->gre_family == AF_INET &&
+ tsc->gre_oip.ip_src.s_addr ==
+ satosin(src)->sin_addr.s_addr &&
+ tsc->gre_oip.ip_dst.s_addr ==
+ satosin(dst)->sin_addr.s_addr) {
+ GRE_LIST_UNLOCK();
+ return (EADDRNOTAVAIL);
+ }
+#endif
+#ifdef INET6
+ if (tsc->gre_family == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_src,
+ &satosin6(src)->sin6_addr) &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_dst,
+ &satosin6(dst)->sin6_addr)) {
+ GRE_LIST_UNLOCK();
+ return (EADDRNOTAVAIL);
+ }
+#endif
+ }
+ GRE_LIST_UNLOCK();
- memset(ro, 0, sizeof(struct route));
- ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
- ro->ro_dst.sa_family = AF_INET;
- ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+ error = 0;
+ switch (src->sa_family) {
+#ifdef INET
+ case AF_INET:
+ hdr = ip = malloc(sizeof(struct greip) +
+ 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
+ ip->ip_src = satosin(src)->sin_addr;
+ ip->ip_dst = satosin(dst)->sin_addr;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ hdr = ip6 = malloc(sizeof(struct greip6) +
+ 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
+ ip6->ip6_src = satosin6(src)->sin6_addr;
+ ip6->ip6_dst = satosin6(dst)->sin6_addr;
+ break;
+#endif
+ default:
+ return (EAFNOSUPPORT);
+ }
+ if (sc->gre_family != 0)
+ gre_detach(sc);
+ GRE_WLOCK(sc);
+ if (sc->gre_family != 0)
+ free(sc->gre_hdr, M_GRE);
+ sc->gre_family = src->sa_family;
+ sc->gre_hdr = hdr;
+ sc->gre_oseq = 0;
+ sc->gre_iseq = UINT32_MAX;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
- /*
- * toggle last bit, so our interface is not found, but a less
- * specific route. I'd rather like to specify a shorter mask,
- * but this is not possible. Should work though. XXX
- * XXX MRT Use a different FIB for the tunnel to solve this problem.
- */
- if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) {
- ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^=
- htonl(0x01);
+ switch (src->sa_family) {
+#ifdef INET
+ case AF_INET:
+ error = in_gre_attach(sc);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ error = in6_gre_attach(sc);
+ break;
+#endif
}
+ if (error == 0) {
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ if_link_state_change(ifp, LINK_STATE_UP);
+ }
+ return (error);
+}
-#ifdef DIAGNOSTIC
- printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)),
- inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
+static void
+gre_delete_tunnel(struct ifnet *ifp)
+{
+ struct gre_softc *sc = ifp->if_softc;
+ int family;
+
+ GRE_WLOCK(sc);
+ family = sc->gre_family;
+ sc->gre_family = 0;
+ GRE_WUNLOCK(sc);
+ if (family != 0) {
+ gre_detach(sc);
+ free(sc->gre_hdr, M_GRE);
+ }
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+}
+
+int
+gre_input(struct mbuf **mp, int *offp, int proto)
+{
+ struct gre_softc *sc;
+ struct grehdr *gh;
+ struct ifnet *ifp;
+ struct mbuf *m;
+ uint32_t *opts;
+#ifdef notyet
+ uint32_t key;
#endif
+ uint16_t flags;
+ int hlen, isr, af;
- rtalloc_fib(ro, sc->gre_fibnum);
+ m = *mp;
+ sc = encap_getarg(m);
+ KASSERT(sc != NULL, ("encap_getarg returned NULL"));
- /*
- * check if this returned a route at all and this route is no
- * recursion to ourself
- */
- if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
-#ifdef DIAGNOSTIC
- if (ro->ro_rt == NULL)
- printf(" - no route found!\n");
- else
- printf(" - route loops back to ourself!\n");
+ ifp = GRE2IFP(sc);
+ hlen = *offp + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
+ if (m->m_pkthdr.len < hlen)
+ goto drop;
+ if (m->m_len < hlen) {
+ m = m_pullup(m, hlen);
+ if (m == NULL)
+ goto drop;
+ }
+ gh = (struct grehdr *)mtodo(m, *offp);
+ flags = ntohs(gh->gre_flags);
+ if (flags & ~GRE_FLAGS_MASK)
+ goto drop;
+ opts = gh->gre_opts;
+ hlen = 2 * sizeof(uint16_t);
+ if (flags & GRE_FLAGS_CP) {
+ /* reserved1 field must be zero */
+ if (((uint16_t *)opts)[1] != 0)
+ goto drop;
+ if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0)
+ goto drop;
+ hlen += 2 * sizeof(uint16_t);
+ opts++;
+ }
+ if (flags & GRE_FLAGS_KP) {
+#ifdef notyet
+ /*
+ * XXX: The current implementation uses the key only for outgoing
+ * packets. But we can check the key value here, or even in the
+ * encapcheck function.
+ */
+ key = ntohl(*opts);
#endif
- return EADDRNOTAVAIL;
+ hlen += sizeof(uint32_t);
+ opts++;
+ }
+#ifdef notyet
+ } else
+ key = 0;
+ if (sc->gre_key != 0 && (key != sc->gre_key || key != 0))
+ goto drop;
+#endif
+ if (flags & GRE_FLAGS_SP) {
+#ifdef notyet
+ seq = ntohl(*opts);
+#endif
+ hlen += sizeof(uint32_t);
}
+ switch (ntohs(gh->gre_proto)) {
+ case ETHERTYPE_WCCP:
+ /*
+ * For WCCP skip an additional 4 bytes if after GRE header
+ * doesn't follow an IP header.
+ */
+ if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40)
+ hlen += sizeof(uint32_t);
+ /* FALLTHROUGH */
+ case ETHERTYPE_IP:
+ isr = NETISR_IP;
+ af = AF_INET;
+ break;
+ case ETHERTYPE_IPV6:
+ isr = NETISR_IPV6;
+ af = AF_INET6;
+ break;
+ default:
+ goto drop;
+ }
+ m_adj(m, *offp + hlen);
+ m_clrprotoflags(m);
+ m->m_pkthdr.rcvif = ifp;
+ M_SETFIB(m, ifp->if_fib);
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ ifp->if_ipackets++;
+ ifp->if_ibytes += m->m_pkthdr.len;
+ if ((ifp->if_flags & IFF_MONITOR) != 0)
+ m_freem(m);
+ else
+ netisr_dispatch(isr, m);
+ return (IPPROTO_DONE);
+drop:
+ ifp->if_ierrors++;
+ m_freem(m);
+ return (IPPROTO_DONE);
+}
- /*
- * now change it back - else ip_output will just drop
- * the route and search one to this interface ...
- */
- if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0)
- ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
+#define MTAG_GRE 1307983903
+static int
+gre_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+ struct m_tag *mtag;
+ int count;
-#ifdef DIAGNOSTIC
- printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp),
- inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
- printf("\n");
+ count = 1;
+ mtag = NULL;
+ while ((mtag = m_tag_locate(m, MTAG_GRE, 0, mtag)) != NULL) {
+ if (*(struct ifnet **)(mtag + 1) == ifp) {
+ log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
+ return (EIO);
+ }
+ count++;
+ }
+ if (count > V_max_gre_nesting) {
+ log(LOG_NOTICE,
+ "%s: if_output recursively called too many times(%d)\n",
+ ifp->if_xname, count);
+ return (EIO);
+ }
+ mtag = m_tag_alloc(MTAG_GRE, 0, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL)
+ return (ENOMEM);
+ *(struct ifnet **)(mtag + 1) = ifp;
+ m_tag_prepend(m, mtag);
+ return (0);
+}
+
+static int
+gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ uint32_t af;
+ int error;
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error != 0)
+ goto drop;
#endif
+ if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+ (ifp->if_flags & IFF_UP) == 0) {
+ error = ENETDOWN;
+ goto drop;
+ }
- return 0;
+ error = gre_check_nesting(ifp, m);
+ if (error != 0)
+ goto drop;
+
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+ if (dst->sa_family == AF_UNSPEC)
+ bcopy(dst->sa_data, &af, sizeof(af));
+ else
+ af = dst->sa_family;
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ m->m_pkthdr.csum_data = af; /* save af for if_transmit */
+ return (ifp->if_transmit(ifp, m));
+drop:
+ m_freem(m);
+ ifp->if_oerrors++;
+ return (error);
}
-/*
- * do a checksum of a buffer - much like in_cksum, which operates on
- * mbufs.
- */
-u_int16_t
-gre_in_cksum(u_int16_t *p, u_int len)
+static void
+gre_setseqn(struct grehdr *gh, uint32_t seq)
{
- u_int32_t sum = 0;
- int nwords = len >> 1;
+ uint32_t *opts;
+ uint16_t flags;
- while (nwords-- != 0)
- sum += *p++;
+ opts = gh->gre_opts;
+ flags = ntohs(gh->gre_flags);
+ KASSERT((flags & GRE_FLAGS_SP) != 0,
+ ("gre_setseqn called, but GRE_FLAGS_SP isn't set "));
+ if (flags & GRE_FLAGS_CP)
+ opts++;
+ if (flags & GRE_FLAGS_KP)
+ opts++;
+ *opts = htonl(seq);
+}
- if (len & 1) {
- union {
- u_short w;
- u_char c[2];
- } u;
- u.c[0] = *(u_char *)p;
- u.c[1] = 0;
- sum += u.w;
+static int
+gre_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+ GRE_RLOCK_TRACKER;
+ struct gre_softc *sc;
+ struct grehdr *gh;
+ uint32_t iaf, oaf, oseq;
+ int error, hlen, olen, plen;
+ int want_seq, want_csum;
+
+ plen = 0;
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
}
+ GRE_RLOCK(sc);
+ if (sc->gre_family == 0) {
+ GRE_RUNLOCK(sc);
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
+ }
+ iaf = m->m_pkthdr.csum_data;
+ oaf = sc->gre_family;
+ hlen = sc->gre_hlen;
+ want_seq = (sc->gre_options & GRE_ENABLE_SEQ) != 0;
+ if (want_seq)
+ oseq = sc->gre_oseq++;
+ else
+ oseq = 0; /* Make compiler happy. */
+ want_csum = (sc->gre_options & GRE_ENABLE_CSUM) != 0;
+ M_SETFIB(m, sc->gre_fibnum);
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL) {
+ GRE_RUNLOCK(sc);
+ error = ENOBUFS;
+ goto drop;
+ }
+ bcopy(sc->gre_hdr, mtod(m, void *), hlen);
+ GRE_RUNLOCK(sc);
+ switch (oaf) {
+#ifdef INET
+ case AF_INET:
+ olen = sizeof(struct ip);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ olen = sizeof(struct ip6_hdr);
+ break;
+#endif
+ default:
+ error = ENETDOWN;
+ goto drop;
+ }
+ gh = (struct grehdr *)mtodo(m, olen);
+ switch (iaf) {
+#ifdef INET
+ case AF_INET:
+ gh->gre_proto = htons(ETHERTYPE_IP);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ gh->gre_proto = htons(ETHERTYPE_IPV6);
+ break;
+#endif
+ default:
+ error = ENETDOWN;
+ goto drop;
+ }
+ if (want_seq)
+ gre_setseqn(gh, oseq);
+ if (want_csum) {
+ *(uint16_t *)gh->gre_opts = in_cksum_skip(m,
+ m->m_pkthdr.len, olen);
+ }
+ plen = m->m_pkthdr.len - hlen;
+ switch (oaf) {
+#ifdef INET
+ case AF_INET:
+ error = in_gre_output(m, iaf, hlen);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ error = in6_gre_output(m, iaf, hlen);
+ break;
+#endif
+ default:
+ m_freem(m);
+ error = ENETDOWN;
+ };
+drop:
+ if (error)
+ ifp->if_oerrors++;
+ else {
+ ifp->if_opackets++;
+ ifp->if_obytes += plen;
+ }
+ return (error);
+}
- /* end-around-carry */
- sum = (sum >> 16) + (sum & 0xffff);
- sum += (sum >> 16);
- return (~sum);
+static void
+gre_qflush(struct ifnet *ifp __unused)
+{
+
}
static int
@@ -959,16 +983,12 @@
switch (type) {
case MOD_LOAD:
- greattach();
- break;
case MOD_UNLOAD:
- if_clone_detach(&gre_cloner);
- mtx_destroy(&gre_mtx);
break;
default:
- return EOPNOTSUPP;
+ return (EOPNOTSUPP);
}
- return 0;
+ return (0);
}
static moduledata_t gre_mod = {
Modified: trunk/sys/net/if_gre.h
===================================================================
--- trunk/sys/net/if_gre.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_gre.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,9 +1,7 @@
/* $MidnightBSD$ */
-/* $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $ */
-/* $FreeBSD: stable/9/sys/net/if_gre.h 223223 2011-06-18 09:34:03Z bz $ */
-
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae at FreeBSD.org>
* All rights reserved
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -29,143 +27,97 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $
+ * $FreeBSD: stable/10/sys/net/if_gre.h 284072 2015-06-06 13:26:13Z ae $
*/
-#ifndef _NET_IF_GRE_H
-#define _NET_IF_GRE_H
+#ifndef _NET_IF_GRE_H_
+#define _NET_IF_GRE_H_
-#include <sys/ioccom.h>
#ifdef _KERNEL
-#include <sys/queue.h>
-
-/*
- * Version of the WCCP, need to be configured manually since
- * header for version 2 is the same but IP payload is prepended
- * with additional 4-bytes field.
- */
-typedef enum {
- WCCP_V1 = 0,
- WCCP_V2
-} wccp_ver_t;
-
-struct gre_softc {
- struct ifnet *sc_ifp;
- LIST_ENTRY(gre_softc) sc_list;
- int gre_unit;
- int gre_flags;
- u_int gre_fibnum; /* use this fib for envelopes */
- struct in_addr g_src; /* source address of gre packets */
- struct in_addr g_dst; /* destination address of gre packets */
- struct route route; /* routing entry that determines, where a
- encapsulated packet should go */
- u_char g_proto; /* protocol of encapsulator */
-
- const struct encaptab *encap; /* encapsulation cookie */
-
- uint32_t key; /* key included in outgoing GRE packets */
- /* zero means none */
-
- wccp_ver_t wccp_ver; /* version of the WCCP */
-};
-#define GRE2IFP(sc) ((sc)->sc_ifp)
-
-
-struct gre_h {
- u_int16_t flags; /* GRE flags */
- u_int16_t ptype; /* protocol type of payload typically
- Ether protocol type*/
- uint32_t options[0]; /* optional options */
-/*
- * from here on: fields are optional, presence indicated by flags
- *
- u_int_16 checksum checksum (one-complements of GRE header
- and payload
- Present if (ck_pres | rt_pres == 1).
- Valid if (ck_pres == 1).
- u_int_16 offset offset from start of routing filed to
- first octet of active SRE (see below).
- Present if (ck_pres | rt_pres == 1).
- Valid if (rt_pres == 1).
- u_int_32 key inserted by encapsulator e.g. for
- authentication
- Present if (key_pres ==1 ).
- u_int_32 seq_num Sequence number to allow for packet order
- Present if (seq_pres ==1 ).
- struct gre_sre[] routing Routing fileds (see below)
- Present if (rt_pres == 1)
- */
+/* GRE header according to RFC 2784 and RFC 2890 */
+struct grehdr {
+ uint16_t gre_flags; /* GRE flags */
+#define GRE_FLAGS_CP 0x8000 /* checksum present */
+#define GRE_FLAGS_KP 0x2000 /* key present */
+#define GRE_FLAGS_SP 0x1000 /* sequence present */
+#define GRE_FLAGS_MASK (GRE_FLAGS_CP|GRE_FLAGS_KP|GRE_FLAGS_SP)
+ uint16_t gre_proto; /* protocol type */
+ uint32_t gre_opts[0]; /* optional fields */
} __packed;
+#ifdef INET
struct greip {
- struct ip gi_i;
- struct gre_h gi_g;
+ struct ip gi_ip;
+ struct grehdr gi_gre;
} __packed;
+#endif
-#define gi_pr gi_i.ip_p
-#define gi_len gi_i.ip_len
-#define gi_src gi_i.ip_src
-#define gi_dst gi_i.ip_dst
-#define gi_ptype gi_g.ptype
-#define gi_flags gi_g.flags
-#define gi_options gi_g.options
+#ifdef INET6
+struct greip6 {
+ struct ip6_hdr gi6_ip6;
+ struct grehdr gi6_gre;
+} __packed;
+#endif
-#define GRE_CP 0x8000 /* Checksum Present */
-#define GRE_RP 0x4000 /* Routing Present */
-#define GRE_KP 0x2000 /* Key Present */
-#define GRE_SP 0x1000 /* Sequence Present */
-#define GRE_SS 0x0800 /* Strict Source Route */
+struct gre_softc {
+ struct ifnet *gre_ifp;
+ LIST_ENTRY(gre_softc) gre_list;
+ struct rmlock gre_lock;
+ int gre_family; /* AF of delivery header */
+ uint32_t gre_iseq;
+ uint32_t gre_oseq;
+ uint32_t gre_key;
+ uint32_t gre_options;
+ uint32_t gre_mtu;
+ u_int gre_fibnum;
+ u_int gre_hlen; /* header size */
+ union {
+ void *hdr;
+#ifdef INET
+ struct greip *gihdr;
+#endif
+#ifdef INET6
+ struct greip6 *gi6hdr;
+#endif
+ } gre_uhdr;
+ const struct encaptab *gre_ecookie;
+};
+#define GRE2IFP(sc) ((sc)->gre_ifp)
+#define GRE_LOCK_INIT(sc) rm_init(&(sc)->gre_lock, "gre softc")
+#define GRE_LOCK_DESTROY(sc) rm_destroy(&(sc)->gre_lock)
+#define GRE_RLOCK_TRACKER struct rm_priotracker gre_tracker
+#define GRE_RLOCK(sc) rm_rlock(&(sc)->gre_lock, &gre_tracker)
+#define GRE_RUNLOCK(sc) rm_runlock(&(sc)->gre_lock, &gre_tracker)
+#define GRE_RLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_RLOCKED)
+#define GRE_WLOCK(sc) rm_wlock(&(sc)->gre_lock)
+#define GRE_WUNLOCK(sc) rm_wunlock(&(sc)->gre_lock)
+#define GRE_WLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_WLOCKED)
+#define gre_hdr gre_uhdr.hdr
+#define gre_gihdr gre_uhdr.gihdr
+#define gre_gi6hdr gre_uhdr.gi6hdr
+#define gre_oip gre_gihdr->gi_ip
+#define gre_oip6 gre_gi6hdr->gi6_ip6
+
+int gre_input(struct mbuf **, int *, int);
+#ifdef INET
+int in_gre_attach(struct gre_softc *);
+int in_gre_output(struct mbuf *, int, int);
+#endif
+#ifdef INET6
+int in6_gre_attach(struct gre_softc *);
+int in6_gre_output(struct mbuf *, int, int);
+#endif
/*
* CISCO uses special type for GRE tunnel created as part of WCCP
* connection, while in fact those packets are just IPv4 encapsulated
* into GRE.
*/
-#define WCCP_PROTOCOL_TYPE 0x883E
-
-/*
- * gre_sre defines a Source route Entry. These are needed if packets
- * should be routed over more than one tunnel hop by hop
- */
-struct gre_sre {
- u_int16_t sre_family; /* address family */
- u_char sre_offset; /* offset to first octet of active entry */
- u_char sre_length; /* number of octets in the SRE.
- sre_lengthl==0 -> last entry. */
- u_char *sre_rtinfo; /* the routing information */
-};
-
-struct greioctl {
- int unit;
- struct in_addr addr;
-};
-
-/* for mobile encaps */
-
-struct mobile_h {
- u_int16_t proto; /* protocol and S-bit */
- u_int16_t hcrc; /* header checksum */
- u_int32_t odst; /* original destination address */
- u_int32_t osrc; /* original source addr, if S-bit set */
-} __packed;
-
-struct mobip_h {
- struct ip mi;
- struct mobile_h mh;
-} __packed;
-
-
-#define MOB_H_SIZ_S (sizeof(struct mobile_h) - sizeof(u_int32_t))
-#define MOB_H_SIZ_L (sizeof(struct mobile_h))
-#define MOB_H_SBIT 0x0080
-
-#define GRE_TTL 30
-
+#define ETHERTYPE_WCCP 0x883E
#endif /* _KERNEL */
-/*
- * ioctls needed to manipulate the interface
- */
-
#define GRESADDRS _IOW('i', 101, struct ifreq)
#define GRESADDRD _IOW('i', 102, struct ifreq)
#define GREGADDRS _IOWR('i', 103, struct ifreq)
@@ -172,15 +124,14 @@
#define GREGADDRD _IOWR('i', 104, struct ifreq)
#define GRESPROTO _IOW('i' , 105, struct ifreq)
#define GREGPROTO _IOWR('i', 106, struct ifreq)
-#define GREGKEY _IOWR('i', 107, struct ifreq)
-#define GRESKEY _IOW('i', 108, struct ifreq)
-#ifdef _KERNEL
-LIST_HEAD(gre_softc_head, gre_softc);
-extern struct mtx gre_mtx;
-extern struct gre_softc_head gre_softc_list;
+#define GREGKEY _IOWR('i', 107, struct ifreq)
+#define GRESKEY _IOW('i', 108, struct ifreq)
+#define GREGOPTS _IOWR('i', 109, struct ifreq)
+#define GRESOPTS _IOW('i', 110, struct ifreq)
-u_int16_t gre_in_cksum(u_int16_t *, u_int);
-#endif /* _KERNEL */
+#define GRE_ENABLE_CSUM 0x0001
+#define GRE_ENABLE_SEQ 0x0002
+#define GRE_OPTMASK (GRE_ENABLE_CSUM|GRE_ENABLE_SEQ)
-#endif
+#endif /* _NET_IF_GRE_H_ */
Modified: trunk/sys/net/if_iso88025subr.c
===================================================================
--- trunk/sys/net/if_iso88025subr.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_iso88025subr.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -31,7 +31,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/if_iso88025subr.c 249132 2013-04-05 08:22:11Z mav $
+ * $FreeBSD: stable/10/sys/net/if_iso88025subr.c 332160 2018-04-07 00:04:28Z brooks $
*
*/
@@ -201,13 +201,9 @@
}
break;
- case SIOCGIFADDR: {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *) & ifr->ifr_data;
- bcopy(IF_LLADDR(ifp),
- (caddr_t) sa->sa_data, ISO88025_ADDR_LEN);
- }
+ case SIOCGIFADDR:
+ bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
+ ISO88025_ADDR_LEN);
break;
case SIOCSIFMTU:
@@ -232,11 +228,8 @@
* ISO88025 encapsulation
*/
int
-iso88025_output(ifp, m, dst, ro)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr *dst;
- struct route *ro;
+iso88025_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
{
u_int16_t snap_type = 0;
int loop_copy = 0, error = 0, rif_len = 0;
@@ -339,7 +332,7 @@
bcopy((caddr_t)&(satoipx_addr(dst).x_host), (caddr_t)edst,
ISO88025_ADDR_LEN);
- M_PREPEND(m, 3, M_WAIT);
+ M_PREPEND(m, 3, M_WAITOK);
m = m_pullup(m, 3);
if (m == 0)
senderr(ENOBUFS);
@@ -352,7 +345,7 @@
#endif /* IPX */
case AF_UNSPEC:
{
- struct iso88025_sockaddr_data *sd;
+ const struct iso88025_sockaddr_data *sd;
/*
* For AF_UNSPEC sockaddr.sa_data must contain all of the
* mac information needed to send the packet. This allows
@@ -362,13 +355,12 @@
* should be an iso88025_sockaddr_data structure see iso88025.h
*/
loop_copy = -1;
- sd = (struct iso88025_sockaddr_data *)dst->sa_data;
+ sd = (const struct iso88025_sockaddr_data *)dst->sa_data;
gen_th.ac = sd->ac;
gen_th.fc = sd->fc;
- (void)memcpy((caddr_t)edst, (caddr_t)sd->ether_dhost,
- ISO88025_ADDR_LEN);
- (void)memcpy((caddr_t)gen_th.iso88025_shost,
- (caddr_t)sd->ether_shost, ISO88025_ADDR_LEN);
+ (void)memcpy(edst, sd->ether_dhost, ISO88025_ADDR_LEN);
+ (void)memcpy(gen_th.iso88025_shost, sd->ether_shost,
+ ISO88025_ADDR_LEN);
rif_len = 0;
break;
}
@@ -383,7 +375,7 @@
*/
if (snap_type != 0) {
struct llc *l;
- M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
+ M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
if (m == 0)
senderr(ENOBUFS);
l = mtod(m, struct llc *);
@@ -399,7 +391,7 @@
* Add local net header. If no space in first mbuf,
* allocate another.
*/
- M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_DONTWAIT);
+ M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_NOWAIT);
if (m == 0)
senderr(ENOBUFS);
th = mtod(m, struct iso88025_header *);
@@ -481,7 +473,6 @@
goto dropanyway;
}
th = mtod(m, struct iso88025_header *);
- m->m_pkthdr.header = (void *)th;
/*
* Discard packet if interface is not up.
Modified: trunk/sys/net/if_lagg.c
===================================================================
--- trunk/sys/net/if_lagg.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_lagg.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -19,7 +19,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_lagg.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_lagg.c 313112 2017-02-02 23:04:01Z asomers $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -38,7 +38,7 @@
#include <sys/proc.h>
#include <sys/hash.h>
#include <sys/lock.h>
-#include <sys/rwlock.h>
+#include <sys/rmlock.h>
#include <sys/taskqueue.h>
#include <sys/eventhandler.h>
@@ -52,18 +52,21 @@
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/bpf.h>
+#include <net/vnet.h>
#if defined(INET) || defined(INET6)
#include <netinet/in.h>
+#include <netinet/ip.h>
#endif
#ifdef INET
#include <netinet/in_systm.h>
#include <netinet/if_ether.h>
-#include <netinet/ip.h>
#endif
#ifdef INET6
#include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/in6_ifattach.h>
#endif
#include <net/if_vlan_var.h>
@@ -80,12 +83,23 @@
{0, NULL}
};
-SLIST_HEAD(__trhead, lagg_softc) lagg_list; /* list of laggs */
-static struct mtx lagg_list_mtx;
+VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
+#define V_lagg_list VNET(lagg_list)
+static VNET_DEFINE(struct mtx, lagg_list_mtx);
+#define V_lagg_list_mtx VNET(lagg_list_mtx)
+#define LAGG_LIST_LOCK_INIT(x) mtx_init(&V_lagg_list_mtx, \
+ "if_lagg list", NULL, MTX_DEF)
+#define LAGG_LIST_LOCK_DESTROY(x) mtx_destroy(&V_lagg_list_mtx)
+#define LAGG_LIST_LOCK(x) mtx_lock(&V_lagg_list_mtx)
+#define LAGG_LIST_UNLOCK(x) mtx_unlock(&V_lagg_list_mtx)
eventhandler_tag lagg_detach_cookie = NULL;
static int lagg_clone_create(struct if_clone *, int, caddr_t);
static void lagg_clone_destroy(struct ifnet *);
+static VNET_DEFINE(struct if_clone *, lagg_cloner);
+#define V_lagg_cloner VNET(lagg_cloner)
+static const char laggname[] = "lagg";
+
static void lagg_lladdr(struct lagg_softc *, uint8_t *);
static void lagg_capabilities(struct lagg_softc *);
static void lagg_port_lladdr(struct lagg_port *, uint8_t *);
@@ -97,7 +111,7 @@
static void lagg_port_state(struct ifnet *, int);
static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
static int lagg_port_output(struct ifnet *, struct mbuf *,
- struct sockaddr *, struct route *);
+ const struct sockaddr *, struct route *);
static void lagg_port_ifdetach(void *arg __unused, struct ifnet *);
#ifdef LAGG_PORT_STACKING
static int lagg_port_checkstacking(struct lagg_softc *);
@@ -119,10 +133,8 @@
struct lagg_port *);
static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
-IFC_SIMPLE_DECLARE(lagg, 0);
-
/* Simple round robin */
-static int lagg_rr_attach(struct lagg_softc *);
+static void lagg_rr_attach(struct lagg_softc *);
static int lagg_rr_detach(struct lagg_softc *);
static int lagg_rr_start(struct lagg_softc *, struct mbuf *);
static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
@@ -129,7 +141,7 @@
struct mbuf *);
/* Active failover */
-static int lagg_fail_attach(struct lagg_softc *);
+static void lagg_fail_attach(struct lagg_softc *);
static int lagg_fail_detach(struct lagg_softc *);
static int lagg_fail_start(struct lagg_softc *, struct mbuf *);
static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
@@ -136,7 +148,7 @@
struct mbuf *);
/* Loadbalancing */
-static int lagg_lb_attach(struct lagg_softc *);
+static void lagg_lb_attach(struct lagg_softc *);
static int lagg_lb_detach(struct lagg_softc *);
static int lagg_lb_port_create(struct lagg_port *);
static void lagg_lb_port_destroy(struct lagg_port *);
@@ -146,7 +158,7 @@
static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
/* 802.3ad LACP */
-static int lagg_lacp_attach(struct lagg_softc *);
+static void lagg_lacp_attach(struct lagg_softc *);
static int lagg_lacp_detach(struct lagg_softc *);
static int lagg_lacp_start(struct lagg_softc *, struct mbuf *);
static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
@@ -153,10 +165,12 @@
struct mbuf *);
static void lagg_lacp_lladdr(struct lagg_softc *);
+static void lagg_callout(void *);
+
/* lagg protocol table */
-static const struct {
- int ti_proto;
- int (*ti_attach)(struct lagg_softc *);
+static const struct lagg_proto {
+ lagg_proto ti_proto;
+ void (*ti_attach)(struct lagg_softc *);
} lagg_protos[] = {
{ LAGG_PROTO_ROUNDROBIN, lagg_rr_attach },
{ LAGG_PROTO_FAILOVER, lagg_fail_attach },
@@ -167,19 +181,52 @@
};
SYSCTL_DECL(_net_link);
-static SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
+SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
"Link Aggregation");
-static int lagg_failover_rx_all = 0; /* Allow input on any failover links */
-SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
- &lagg_failover_rx_all, 0,
+/* Allow input on any failover links */
+static VNET_DEFINE(int, lagg_failover_rx_all);
+#define V_lagg_failover_rx_all VNET(lagg_failover_rx_all)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET,
+ &VNET_NAME(lagg_failover_rx_all), 0,
"Accept input from any interface in a failover lagg");
-static int def_use_flowid = 1; /* Default value for using M_FLOWID */
-TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid);
-SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW,
- &def_use_flowid, 0,
+
+/* Default value for using M_FLOWID */
+static VNET_DEFINE(int, def_use_flowid) = 1;
+#define V_def_use_flowid VNET(def_use_flowid)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
+ &VNET_NAME(def_use_flowid), 0,
"Default setting for using flow id for load sharing");
+/* Default value for using M_FLOWID */
+static VNET_DEFINE(int, def_flowid_shift) = 16;
+#define V_def_flowid_shift VNET(def_flowid_shift)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN,
+ &VNET_NAME(def_flowid_shift), 0,
+ "Default setting for flowid shift for load sharing");
+
+static void
+vnet_lagg_init(const void *unused __unused)
+{
+
+ LAGG_LIST_LOCK_INIT();
+ SLIST_INIT(&V_lagg_list);
+ V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create,
+ lagg_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_lagg_init, NULL);
+
+static void
+vnet_lagg_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(V_lagg_cloner);
+ LAGG_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_lagg_uninit, NULL);
+
static int
lagg_modevent(module_t mod, int type, void *data)
{
@@ -186,9 +233,6 @@
switch (type) {
case MOD_LOAD:
- mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF);
- SLIST_INIT(&lagg_list);
- if_clone_attach(&lagg_cloner);
lagg_input_p = lagg_input;
lagg_linkstate_p = lagg_port_state;
lagg_detach_cookie = EVENTHANDLER_REGISTER(
@@ -198,10 +242,8 @@
case MOD_UNLOAD:
EVENTHANDLER_DEREGISTER(ifnet_departure_event,
lagg_detach_cookie);
- if_clone_detach(&lagg_cloner);
lagg_input_p = NULL;
lagg_linkstate_p = NULL;
- mtx_destroy(&lagg_list_mtx);
break;
default:
return (EOPNOTSUPP);
@@ -218,7 +260,6 @@
DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
MODULE_VERSION(if_lagg, 1);
-#if __FreeBSD_version >= 800000
/*
* This routine is run via an vlan
* config EVENT
@@ -228,16 +269,17 @@
{
struct lagg_softc *sc = ifp->if_softc;
struct lagg_port *lp;
+ struct rm_priotracker tracker;
if (ifp->if_softc != arg) /* Not our event */
return;
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
if (!SLIST_EMPTY(&sc->sc_ports)) {
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
}
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
}
/*
@@ -249,18 +291,18 @@
{
struct lagg_softc *sc = ifp->if_softc;
struct lagg_port *lp;
+ struct rm_priotracker tracker;
if (ifp->if_softc != arg) /* Not our event */
return;
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
if (!SLIST_EMPTY(&sc->sc_ports)) {
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
}
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
}
-#endif
static int
lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
@@ -267,10 +309,8 @@
{
struct lagg_softc *sc;
struct ifnet *ifp;
- int i, error = 0;
static const u_char eaddr[6]; /* 00:00:00:00:00:00 */
- struct sysctl_oid *oid;
- char num[14]; /* sufficient for 32 bits */
+ int i;
sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
@@ -279,17 +319,15 @@
return (ENOSPC);
}
- sysctl_ctx_init(&sc->ctx);
- snprintf(num, sizeof(num), "%u", unit);
- sc->use_flowid = def_use_flowid;
- oid = SYSCTL_ADD_NODE(&sc->ctx, &SYSCTL_NODE_CHILDREN(_net_link, lagg),
- OID_AUTO, num, CTLFLAG_RD, NULL, "");
- SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
- "use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid, sc->use_flowid,
- "Use flow id for load sharing");
- SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
- "count", CTLTYPE_INT|CTLFLAG_RD, &sc->sc_count, sc->sc_count,
- "Total number of ports");
+ sc->sc_ipackets = counter_u64_alloc(M_WAITOK);
+ sc->sc_opackets = counter_u64_alloc(M_WAITOK);
+ sc->sc_ibytes = counter_u64_alloc(M_WAITOK);
+ sc->sc_obytes = counter_u64_alloc(M_WAITOK);
+
+ if (V_def_use_flowid)
+ sc->sc_opts |= LAGG_OPT_USE_FLOWID;
+ sc->flowid_shift = V_def_flowid_shift;
+
/* Hash all layers by default */
sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4;
@@ -297,18 +335,21 @@
for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
sc->sc_proto = lagg_protos[i].ti_proto;
- if ((error = lagg_protos[i].ti_attach(sc)) != 0) {
- if_free_type(ifp, IFT_ETHER);
- free(sc, M_DEVBUF);
- return (error);
- }
+ lagg_protos[i].ti_attach(sc);
break;
}
}
LAGG_LOCK_INIT(sc);
+ LAGG_CALLOUT_LOCK_INIT(sc);
SLIST_INIT(&sc->sc_ports);
TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
+ /*
+ * This uses the callout lock rather than the rmlock; one can't
+ * hold said rmlock during SWI.
+ */
+ callout_init_mtx(&sc->sc_callout, &sc->sc_call_mtx, 0);
+
/* Initialise pseudo media types */
ifmedia_init(&sc->sc_media, 0, lagg_media_change,
lagg_media_status);
@@ -315,8 +356,7 @@
ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
- if_initname(ifp, ifc->ifc_name, unit);
- ifp->if_type = IFT_ETHER;
+ if_initname(ifp, laggname, unit);
ifp->if_softc = sc;
ifp->if_transmit = lagg_transmit;
ifp->if_qflush = lagg_qflush;
@@ -323,25 +363,26 @@
ifp->if_init = lagg_init;
ifp->if_ioctl = lagg_ioctl;
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
+ ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
/*
- * Attach as an ordinary ethernet device, childs will be attached
+ * Attach as an ordinary ethernet device, children will be attached
* as special device IFT_IEEE8023ADLAG.
*/
ether_ifattach(ifp, eaddr);
-#if __FreeBSD_version >= 800000
sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
-#endif
/* Insert into the global list of laggs */
- mtx_lock(&lagg_list_mtx);
- SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries);
- mtx_unlock(&lagg_list_mtx);
+ LAGG_LIST_LOCK();
+ SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries);
+ LAGG_LIST_UNLOCK();
+ callout_reset(&sc->sc_callout, hz, lagg_callout, sc);
+
return (0);
}
@@ -356,10 +397,8 @@
lagg_stop(sc);
ifp->if_flags &= ~IFF_UP;
-#if __FreeBSD_version >= 800000
EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
-#endif
/* Shutdown and remove lagg ports */
while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
@@ -367,20 +406,29 @@
/* Unhook the aggregation protocol */
if (sc->sc_detach != NULL)
(*sc->sc_detach)(sc);
+ else
+ LAGG_WUNLOCK(sc);
- LAGG_WUNLOCK(sc);
-
- sysctl_ctx_free(&sc->ctx);
ifmedia_removeall(&sc->sc_media);
ether_ifdetach(ifp);
- if_free_type(ifp, IFT_ETHER);
+ if_free(ifp);
- mtx_lock(&lagg_list_mtx);
- SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries);
- mtx_unlock(&lagg_list_mtx);
+ /* This grabs sc_callout_mtx, serialising it correctly */
+ callout_drain(&sc->sc_callout);
+ /* At this point it's drained; we can free this */
+ counter_u64_free(sc->sc_ipackets);
+ counter_u64_free(sc->sc_opackets);
+ counter_u64_free(sc->sc_ibytes);
+ counter_u64_free(sc->sc_obytes);
+
+ LAGG_LIST_LOCK();
+ SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
+ LAGG_LIST_UNLOCK();
+
taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
LAGG_LOCK_DESTROY(sc);
+ LAGG_CALLOUT_LOCK_DESTROY(sc);
free(sc, M_DEVBUF);
}
@@ -388,15 +436,28 @@
lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
{
struct ifnet *ifp = sc->sc_ifp;
+ struct lagg_port lp;
if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
return;
+ LAGG_WLOCK_ASSERT(sc);
+ /*
+ * Set the link layer address on the lagg interface.
+ * sc_lladdr() notifies the MAC change to
+ * the aggregation protocol. iflladdr_event handler which
+ * may trigger gratuitous ARPs for INET will be handled in
+ * a taskqueue.
+ */
bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
- /* Let the protocol know the MAC has changed */
if (sc->sc_lladdr != NULL)
(*sc->sc_lladdr)(sc);
- EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+
+ bzero(&lp, sizeof(lp));
+ lp.lp_ifp = sc->sc_ifp;
+ lp.lp_softc = sc;
+
+ lagg_port_lladdr(&lp, lladdr);
}
static void
@@ -405,14 +466,18 @@
struct lagg_port *lp;
int cap = ~0, ena = ~0;
u_long hwa = ~0UL;
+ struct ifnet_hw_tsomax hw_tsomax;
LAGG_WLOCK_ASSERT(sc);
+ memset(&hw_tsomax, 0, sizeof(hw_tsomax));
+
/* Get capabilities from the lagg ports */
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
cap &= lp->lp_ifp->if_capabilities;
ena &= lp->lp_ifp->if_capenable;
hwa &= lp->lp_ifp->if_hwassist;
+ if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax);
}
cap = (cap == ~0 ? 0 : cap);
ena = (ena == ~0 ? 0 : ena);
@@ -420,7 +485,8 @@
if (sc->sc_ifp->if_capabilities != cap ||
sc->sc_ifp->if_capenable != ena ||
- sc->sc_ifp->if_hwassist != hwa) {
+ sc->sc_ifp->if_hwassist != hwa ||
+ if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) {
sc->sc_ifp->if_capabilities = cap;
sc->sc_ifp->if_capenable = ena;
sc->sc_ifp->if_hwassist = hwa;
@@ -439,11 +505,13 @@
struct ifnet *ifp = lp->lp_ifp;
struct lagg_llq *llq;
int pending = 0;
+ int primary;
LAGG_WLOCK_ASSERT(sc);
- if (lp->lp_detaching ||
- memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
+ primary = (sc->sc_primary->lp_ifp == ifp) ? 1 : 0;
+ if (primary == 0 && (lp->lp_detaching ||
+ memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0))
return;
/* Check to make sure its not already queued to be changed */
@@ -462,6 +530,7 @@
/* Update the lladdr even if pending, it may have changed */
llq->llq_ifp = ifp;
+ llq->llq_primary = primary;
bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
if (!pending)
@@ -494,12 +563,20 @@
for (llq = head; llq != NULL; llq = head) {
ifp = llq->llq_ifp;
- /* Set the link layer address */
- error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN);
- if (error)
- printf("%s: setlladdr failed on %s\n", __func__,
- ifp->if_xname);
-
+ CURVNET_SET(ifp->if_vnet);
+ if (llq->llq_primary == 0) {
+ /*
+ * Set the link layer address on the laggport interface.
+ * if_setlladdr() triggers gratuitous ARPs for INET.
+ */
+ error = if_setlladdr(ifp, llq->llq_lladdr,
+ ETHER_ADDR_LEN);
+ if (error)
+ printf("%s: setlladdr failed on %s\n", __func__,
+ ifp->if_xname);
+ } else
+ EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+ CURVNET_RESTORE();
head = SLIST_NEXT(llq, llq_entries);
free(llq, M_DEVBUF);
}
@@ -509,7 +586,7 @@
lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
{
struct lagg_softc *sc_ptr;
- struct lagg_port *lp;
+ struct lagg_port *lp, *tlp;
int error = 0;
LAGG_WLOCK_ASSERT(sc);
@@ -545,10 +622,10 @@
return (ENOMEM);
/* Check if port is a stacked lagg */
- mtx_lock(&lagg_list_mtx);
- SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) {
+ LAGG_LIST_LOCK();
+ SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
if (ifp == sc_ptr->sc_ifp) {
- mtx_unlock(&lagg_list_mtx);
+ LAGG_LIST_UNLOCK();
free(lp, M_DEVBUF);
return (EINVAL);
/* XXX disable stacking for the moment, its untested */
@@ -556,7 +633,7 @@
lp->lp_flags |= LAGG_PORT_STACK;
if (lagg_port_checkstacking(sc_ptr) >=
LAGG_MAX_STACKING) {
- mtx_unlock(&lagg_list_mtx);
+ LAGG_LIST_UNLOCK();
free(lp, M_DEVBUF);
return (E2BIG);
}
@@ -563,7 +640,7 @@
#endif
}
}
- mtx_unlock(&lagg_list_mtx);
+ LAGG_LIST_UNLOCK();
/* Change the interface type */
lp->lp_iftype = ifp->if_type;
@@ -588,8 +665,23 @@
lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp));
}
- /* Insert into the list of ports */
- SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
+ /*
+ * Insert into the list of ports.
+ * Keep ports sorted by if_index. It is handy, when configuration
+ * is predictable and `ifconfig laggN create ...` command
+ * will lead to the same result each time.
+ */
+ SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) {
+ if (tlp->lp_ifp->if_index < ifp->if_index && (
+ SLIST_NEXT(tlp, lp_entries) == NULL ||
+ SLIST_NEXT(tlp, lp_entries)->lp_ifp->if_index >
+ ifp->if_index))
+ break;
+ }
+ if (tlp != NULL)
+ SLIST_INSERT_AFTER(tlp, lp, lp_entries);
+ else
+ SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
sc->sc_count++;
/* Update lagg capabilities */
@@ -714,6 +806,7 @@
struct lagg_softc *sc;
struct lagg_port *lp = NULL;
int error = 0;
+ struct rm_priotracker tracker;
/* Should be checked by the caller */
if (ifp->if_type != IFT_IEEE8023ADLAG ||
@@ -728,15 +821,15 @@
break;
}
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
error = ENOENT;
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
break;
}
lagg_port2req(lp, rp);
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
break;
case SIOCSIFCAP:
@@ -766,7 +859,7 @@
return (error);
fallback:
- if (lp->lp_ioctl != NULL)
+ if (lp != NULL && lp->lp_ioctl != NULL)
return ((*lp->lp_ioctl)(ifp, cmd, data));
return (EINVAL);
@@ -777,7 +870,7 @@
*/
static int
lagg_port_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro)
+ const struct sockaddr *dst, struct route *ro)
{
struct lagg_port *lp = ifp->if_lagg;
@@ -789,7 +882,7 @@
/* drop any other frames */
m_freem(m);
- return (EBUSY);
+ return (ENETDOWN);
}
static void
@@ -897,29 +990,32 @@
{
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
struct lagg_reqall *ra = (struct lagg_reqall *)data;
+ struct lagg_reqopts *ro = (struct lagg_reqopts *)data;
struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
struct ifreq *ifr = (struct ifreq *)data;
struct lagg_port *lp;
+ const struct lagg_proto *proto = NULL;
struct ifnet *tpif;
struct thread *td = curthread;
char *buf, *outbuf;
int count, buflen, len, error = 0;
+ struct rm_priotracker tracker;
bzero(&rpbuf, sizeof(rpbuf));
switch (cmd) {
case SIOCGLAGG:
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
count = 0;
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
count++;
buflen = count * sizeof(struct lagg_reqport);
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
ra->ra_proto = sc->sc_proto;
if (sc->sc_req != NULL)
(*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
@@ -937,7 +1033,7 @@
buf += sizeof(rpbuf);
len -= sizeof(rpbuf);
}
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
ra->ra_ports = count;
ra->ra_size = count * sizeof(rpbuf);
error = copyout(outbuf, ra->ra_port, ra->ra_size);
@@ -947,15 +1043,27 @@
error = priv_check(td, PRIV_NET_LAGG);
if (error)
break;
- if (ra->ra_proto >= LAGG_PROTO_MAX) {
+ for (proto = lagg_protos; proto->ti_proto != LAGG_PROTO_NONE;
+ proto++) {
+ if (proto->ti_proto == ra->ra_proto) {
+ if (sc->sc_ifflags & IFF_DEBUG)
+ printf("%s: using proto %u\n",
+ sc->sc_ifname, proto->ti_proto);
+ break;
+ }
+ }
+ if (proto->ti_proto >= LAGG_PROTO_MAX) {
error = EPROTONOSUPPORT;
break;
}
+ /* Set to LAGG_PROTO_NONE during the attach. */
LAGG_WLOCK(sc);
if (sc->sc_proto != LAGG_PROTO_NONE) {
- /* Reset protocol first in case detach unlocks */
+ int (*sc_detach)(struct lagg_softc *sc);
+
+ /* Reset protocol and pointers */
sc->sc_proto = LAGG_PROTO_NONE;
- error = sc->sc_detach(sc);
+ sc_detach = sc->sc_detach;
sc->sc_detach = NULL;
sc->sc_start = NULL;
sc->sc_input = NULL;
@@ -967,30 +1075,138 @@
sc->sc_lladdr = NULL;
sc->sc_req = NULL;
sc->sc_portreq = NULL;
- } else if (sc->sc_input != NULL) {
- /* Still detaching */
- error = EBUSY;
+
+ if (sc_detach != NULL)
+ sc_detach(sc);
+ else
+ LAGG_WUNLOCK(sc);
+ } else
+ LAGG_WUNLOCK(sc);
+ if (proto->ti_proto != LAGG_PROTO_NONE)
+ proto->ti_attach(sc);
+ LAGG_WLOCK(sc);
+ sc->sc_proto = proto->ti_proto;
+ LAGG_WUNLOCK(sc);
+ break;
+ case SIOCGLAGGOPTS:
+ ro->ro_opts = sc->sc_opts;
+ if (sc->sc_proto == LAGG_PROTO_LACP) {
+ struct lacp_softc *lsc;
+
+ lsc = (struct lacp_softc *)sc->sc_psc;
+ if (lsc->lsc_debug.lsc_tx_test != 0)
+ ro->ro_opts |= LAGG_OPT_LACP_TXTEST;
+ if (lsc->lsc_debug.lsc_rx_test != 0)
+ ro->ro_opts |= LAGG_OPT_LACP_RXTEST;
+ if (lsc->lsc_strict_mode != 0)
+ ro->ro_opts |= LAGG_OPT_LACP_STRICT;
+ if (lsc->lsc_fast_timeout != 0)
+ ro->ro_opts |= LAGG_OPT_LACP_TIMEOUT;
+
+ ro->ro_active = sc->sc_active;
+ } else {
+ ro->ro_active = 0;
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ ro->ro_active += LAGG_PORTACTIVE(lp);
}
- if (error != 0) {
- LAGG_WUNLOCK(sc);
+ ro->ro_flapping = sc->sc_flapping;
+ ro->ro_flowid_shift = sc->flowid_shift;
+ break;
+ case SIOCSLAGGOPTS:
+ error = priv_check(td, PRIV_NET_LAGG);
+ if (error)
break;
+ if (ro->ro_opts == 0)
+ break;
+ /*
+ * Set options. LACP options are stored in sc->sc_psc,
+ * not in sc_opts.
+ */
+ int valid, lacp;
+
+ switch (ro->ro_opts) {
+ case LAGG_OPT_USE_FLOWID:
+ case -LAGG_OPT_USE_FLOWID:
+ case LAGG_OPT_FLOWIDSHIFT:
+ valid = 1;
+ lacp = 0;
+ break;
+ case LAGG_OPT_LACP_TXTEST:
+ case -LAGG_OPT_LACP_TXTEST:
+ case LAGG_OPT_LACP_RXTEST:
+ case -LAGG_OPT_LACP_RXTEST:
+ case LAGG_OPT_LACP_STRICT:
+ case -LAGG_OPT_LACP_STRICT:
+ case LAGG_OPT_LACP_TIMEOUT:
+ case -LAGG_OPT_LACP_TIMEOUT:
+ valid = lacp = 1;
+ break;
+ default:
+ valid = lacp = 0;
+ break;
}
- for (int i = 0; i < (sizeof(lagg_protos) /
- sizeof(lagg_protos[0])); i++) {
- if (lagg_protos[i].ti_proto == ra->ra_proto) {
- if (sc->sc_ifflags & IFF_DEBUG)
- printf("%s: using proto %u\n",
- sc->sc_ifname,
- lagg_protos[i].ti_proto);
- sc->sc_proto = lagg_protos[i].ti_proto;
- if (sc->sc_proto != LAGG_PROTO_NONE)
- error = lagg_protos[i].ti_attach(sc);
- LAGG_WUNLOCK(sc);
- return (error);
+
+ LAGG_WLOCK(sc);
+ if (valid == 0 ||
+ (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) {
+ /* Invalid combination of options specified. */
+ error = EINVAL;
+ LAGG_WUNLOCK(sc);
+ break; /* Return from SIOCSLAGGOPTS. */
+ }
+ /*
+ * Store new options into sc->sc_opts except for
+ * FLOWIDSHIFT and LACP options.
+ */
+ if (lacp == 0) {
+ if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT)
+ sc->flowid_shift = ro->ro_flowid_shift;
+ else if (ro->ro_opts > 0)
+ sc->sc_opts |= ro->ro_opts;
+ else
+ sc->sc_opts &= ~ro->ro_opts;
+ } else {
+ struct lacp_softc *lsc;
+ struct lacp_port *lp;
+
+ lsc = (struct lacp_softc *)sc->sc_psc;
+
+ switch (ro->ro_opts) {
+ case LAGG_OPT_LACP_TXTEST:
+ lsc->lsc_debug.lsc_tx_test = 1;
+ break;
+ case -LAGG_OPT_LACP_TXTEST:
+ lsc->lsc_debug.lsc_tx_test = 0;
+ break;
+ case LAGG_OPT_LACP_RXTEST:
+ lsc->lsc_debug.lsc_rx_test = 1;
+ break;
+ case -LAGG_OPT_LACP_RXTEST:
+ lsc->lsc_debug.lsc_rx_test = 0;
+ break;
+ case LAGG_OPT_LACP_STRICT:
+ lsc->lsc_strict_mode = 1;
+ break;
+ case -LAGG_OPT_LACP_STRICT:
+ lsc->lsc_strict_mode = 0;
+ break;
+ case LAGG_OPT_LACP_TIMEOUT:
+ LACP_LOCK(lsc);
+ LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
+ lp->lp_state |= LACP_STATE_TIMEOUT;
+ LACP_UNLOCK(lsc);
+ lsc->lsc_fast_timeout = 1;
+ break;
+ case -LAGG_OPT_LACP_TIMEOUT:
+ LACP_LOCK(lsc);
+ LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
+ lp->lp_state &= ~LACP_STATE_TIMEOUT;
+ LACP_UNLOCK(lsc);
+ lsc->lsc_fast_timeout = 0;
+ break;
}
}
LAGG_WUNLOCK(sc);
- error = EPROTONOSUPPORT;
break;
case SIOCGLAGGFLAGS:
rf->rf_flags = sc->sc_flags;
@@ -1015,16 +1231,16 @@
break;
}
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
lp->lp_softc != sc) {
error = ENOENT;
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
break;
}
lagg_port2req(lp, rp);
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
break;
case SIOCSLAGGPORT:
error = priv_check(td, PRIV_NET_LAGG);
@@ -1035,6 +1251,26 @@
error = EINVAL;
break;
}
+#ifdef INET6
+ /*
+ * A laggport interface should not have inet6 address
+ * because two interfaces with a valid link-local
+ * scope zone must not be merged in any form. This
+ * restriction is needed to prevent violation of
+ * link-local scope zone. Attempts to add a laggport
+ * interface which has inet6 addresses triggers
+ * removal of all inet6 addresses on the member
+ * interface.
+ */
+ if (in6ifa_llaonifp(tpif)) {
+ in6_ifdetach(tpif);
+ if_printf(sc->sc_ifp,
+ "IPv6 addresses on %s have been removed "
+ "before adding it as a member to prevent "
+ "IPv6 address scope violation.\n",
+ tpif->if_xname);
+ }
+#endif
LAGG_WLOCK(sc);
error = lagg_port_create(sc, tpif);
LAGG_WUNLOCK(sc);
@@ -1133,39 +1369,39 @@
struct ifnet *ifp = lp->lp_ifp;
struct ifnet *scifp = sc->sc_ifp;
struct lagg_mc *mc;
- struct ifmultiaddr *ifma, *rifma = NULL;
- struct sockaddr_dl sdl;
+ struct ifmultiaddr *ifma;
int error;
LAGG_WLOCK_ASSERT(sc);
- bzero((char *)&sdl, sizeof(sdl));
- sdl.sdl_len = sizeof(sdl);
- sdl.sdl_family = AF_LINK;
- sdl.sdl_type = IFT_ETHER;
- sdl.sdl_alen = ETHER_ADDR_LEN;
- sdl.sdl_index = ifp->if_index;
-
if (set) {
+ IF_ADDR_WLOCK(scifp);
TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
- bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
- LLADDR(&sdl), ETHER_ADDR_LEN);
-
- error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
- if (error)
- return (error);
mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
- if (mc == NULL)
+ if (mc == NULL) {
+ IF_ADDR_WUNLOCK(scifp);
return (ENOMEM);
- mc->mc_ifma = rifma;
+ }
+ bcopy(ifma->ifma_addr, &mc->mc_addr,
+ ifma->ifma_addr->sa_len);
+ mc->mc_addr.sdl_index = ifp->if_index;
+ mc->mc_ifma = NULL;
SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
}
+ IF_ADDR_WUNLOCK(scifp);
+ SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
+ error = if_addmulti(ifp,
+ (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
+ if (error)
+ return (error);
+ }
} else {
while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
- if_delmulti_ifma(mc->mc_ifma);
+ if (mc->mc_ifma && !lp->lp_detaching)
+ if_delmulti_ifma(mc->mc_ifma);
free(mc, M_DEVBUF);
}
}
@@ -1230,14 +1466,15 @@
{
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
int error, len, mcast;
+ struct rm_priotracker tracker;
len = m->m_pkthdr.len;
mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
/* We need a Tx algorithm and at least one port */
if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
m_freem(m);
ifp->if_oerrors++;
return (ENXIO);
@@ -1246,12 +1483,12 @@
ETHER_BPF_MTAP(ifp, m);
error = (*sc->sc_start)(sc, m);
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
if (error == 0) {
- ifp->if_opackets++;
+ counter_u64_add(sc->sc_opackets, 1);
+ counter_u64_add(sc->sc_obytes, len);
ifp->if_omcasts += mcast;
- ifp->if_obytes += len;
} else
ifp->if_oerrors++;
@@ -1272,12 +1509,13 @@
struct lagg_port *lp = ifp->if_lagg;
struct lagg_softc *sc = lp->lp_softc;
struct ifnet *scifp = sc->sc_ifp;
+ struct rm_priotracker tracker;
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
(lp->lp_flags & LAGG_PORT_DISABLED) ||
sc->sc_proto == LAGG_PROTO_NONE) {
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
m_freem(m);
return (NULL);
}
@@ -1284,11 +1522,11 @@
ETHER_BPF_MTAP(scifp, m);
- m = (*sc->sc_input)(sc, lp, m);
+ m = (lp->lp_detaching == 0) ? (*sc->sc_input)(sc, lp, m) : NULL;
if (m != NULL) {
- scifp->if_ipackets++;
- scifp->if_ibytes += m->m_pkthdr.len;
+ counter_u64_add(sc->sc_ipackets, 1);
+ counter_u64_add(sc->sc_ibytes, m->m_pkthdr.len);
if (scifp->if_flags & IFF_MONITOR) {
m_freem(m);
@@ -1296,7 +1534,7 @@
}
}
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
return (m);
}
@@ -1317,16 +1555,17 @@
{
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
struct lagg_port *lp;
+ struct rm_priotracker tracker;
imr->ifm_status = IFM_AVALID;
imr->ifm_active = IFM_ETHER | IFM_AUTO;
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
if (LAGG_PORTACTIVE(lp))
imr->ifm_status |= IFM_ACTIVE;
}
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
}
static void
@@ -1558,18 +1797,16 @@
/*
* Simple round robin aggregation
*/
-
-static int
+static void
lagg_rr_attach(struct lagg_softc *sc)
{
sc->sc_detach = lagg_rr_detach;
sc->sc_start = lagg_rr_start;
sc->sc_input = lagg_rr_input;
+ sc->sc_detach = NULL;
sc->sc_port_create = NULL;
sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
sc->sc_seq = 0;
-
- return (0);
}
static int
@@ -1596,7 +1833,7 @@
*/
if ((lp = lagg_link_active(sc, lp)) == NULL) {
m_freem(m);
- return (ENOENT);
+ return (ENETDOWN);
}
/* Send mbuf */
@@ -1617,8 +1854,7 @@
/*
* Active failover
*/
-
-static int
+static void
lagg_fail_attach(struct lagg_softc *sc)
{
sc->sc_detach = lagg_fail_detach;
@@ -1626,8 +1862,7 @@
sc->sc_input = lagg_fail_input;
sc->sc_port_create = NULL;
sc->sc_port_destroy = NULL;
-
- return (0);
+ sc->sc_detach = NULL;
}
static int
@@ -1644,7 +1879,7 @@
/* Use the master port if active or the next available port */
if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
m_freem(m);
- return (ENOENT);
+ return (ENETDOWN);
}
/* Send mbuf */
@@ -1657,7 +1892,7 @@
struct ifnet *ifp = sc->sc_ifp;
struct lagg_port *tmp_tp;
- if (lp == sc->sc_primary || lagg_failover_rx_all) {
+ if (lp == sc->sc_primary || V_lagg_failover_rx_all) {
m->m_pkthdr.rcvif = ifp;
return (m);
}
@@ -1681,16 +1916,13 @@
/*
* Loadbalancing
*/
-
-static int
+static void
lagg_lb_attach(struct lagg_softc *sc)
{
struct lagg_port *lp;
struct lagg_lb *lb;
- if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb),
- M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
- return (ENOMEM);
+ lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO);
sc->sc_detach = lagg_lb_detach;
sc->sc_start = lagg_lb_start;
@@ -1704,8 +1936,6 @@
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
lagg_lb_port_create(lp);
-
- return (0);
}
static int
@@ -1712,6 +1942,7 @@
lagg_lb_detach(struct lagg_softc *sc)
{
struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
+ LAGG_WUNLOCK(sc);
if (lb != NULL)
free(lb, M_DEVBUF);
return (0);
@@ -1760,8 +1991,9 @@
struct lagg_port *lp = NULL;
uint32_t p = 0;
- if (sc->use_flowid && (m->m_flags & M_FLOWID))
- p = m->m_pkthdr.flowid;
+ if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
+ M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ p = m->m_pkthdr.flowid >> sc->flowid_shift;
else
p = lagg_hashmbuf(sc, m, lb->lb_key);
p %= sc->sc_count;
@@ -1773,7 +2005,7 @@
*/
if ((lp = lagg_link_active(sc, lp)) == NULL) {
m_freem(m);
- return (ENOENT);
+ return (ENETDOWN);
}
/* Send mbuf */
@@ -1794,12 +2026,10 @@
/*
* 802.3ad LACP
*/
-
-static int
+static void
lagg_lacp_attach(struct lagg_softc *sc)
{
struct lagg_port *lp;
- int error;
sc->sc_detach = lagg_lacp_detach;
sc->sc_port_create = lacp_port_create;
@@ -1813,14 +2043,10 @@
sc->sc_req = lacp_req;
sc->sc_portreq = lacp_portreq;
- error = lacp_attach(sc);
- if (error)
- return (error);
+ lacp_attach(sc);
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
lacp_port_create(lp);
-
- return (error);
}
static int
@@ -1827,17 +2053,18 @@
lagg_lacp_detach(struct lagg_softc *sc)
{
struct lagg_port *lp;
- int error;
+ void *psc;
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
lacp_port_destroy(lp);
- /* unlocking is safe here */
+ psc = sc->sc_psc;
+ sc->sc_psc = NULL;
LAGG_WUNLOCK(sc);
- error = lacp_detach(sc);
- LAGG_WLOCK(sc);
- return (error);
+ lacp_detach(psc);
+
+ return (0);
}
static void
@@ -1862,7 +2089,7 @@
lp = lacp_select_tx_port(sc, m);
if (lp == NULL) {
m_freem(m);
- return (EBUSY);
+ return (ENETDOWN);
}
/* Send mbuf */
@@ -1898,3 +2125,17 @@
m->m_pkthdr.rcvif = ifp;
return (m);
}
+
+static void
+lagg_callout(void *arg)
+{
+ struct lagg_softc *sc = (struct lagg_softc *)arg;
+ struct ifnet *ifp = sc->sc_ifp;
+
+ ifp->if_ipackets = counter_u64_fetch(sc->sc_ipackets);
+ ifp->if_opackets = counter_u64_fetch(sc->sc_opackets);
+ ifp->if_ibytes = counter_u64_fetch(sc->sc_ibytes);
+ ifp->if_obytes = counter_u64_fetch(sc->sc_obytes);
+
+ callout_reset(&sc->sc_callout, hz, lagg_callout, sc);
+}
Modified: trunk/sys/net/if_lagg.h
===================================================================
--- trunk/sys/net/if_lagg.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_lagg.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -16,14 +16,12 @@
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
- * $FreeBSD: stable/9/sys/net/if_lagg.h 236049 2012-05-26 07:39:52Z thompsa $
+ * $FreeBSD: stable/10/sys/net/if_lagg.h 287808 2015-09-15 05:19:10Z hiren $
*/
#ifndef _NET_LAGG_H
#define _NET_LAGG_H
-#include <sys/sysctl.h>
-
/*
* Global definitions
*/
@@ -50,17 +48,19 @@
"\05DISTRIBUTING\06DISABLED"
/* Supported lagg PROTOs */
-#define LAGG_PROTO_NONE 0 /* no lagg protocol defined */
-#define LAGG_PROTO_ROUNDROBIN 1 /* simple round robin */
-#define LAGG_PROTO_FAILOVER 2 /* active failover */
-#define LAGG_PROTO_LOADBALANCE 3 /* loadbalance */
-#define LAGG_PROTO_LACP 4 /* 802.3ad lacp */
-#define LAGG_PROTO_ETHERCHANNEL 5 /* Cisco FEC */
-#define LAGG_PROTO_MAX 6
+typedef enum {
+ LAGG_PROTO_NONE = 0, /* no lagg protocol defined */
+ LAGG_PROTO_ROUNDROBIN, /* simple round robin */
+ LAGG_PROTO_FAILOVER, /* active failover */
+ LAGG_PROTO_LOADBALANCE, /* loadbalance */
+ LAGG_PROTO_LACP, /* 802.3ad lacp */
+ LAGG_PROTO_ETHERCHANNEL,/* Cisco FEC */
+ LAGG_PROTO_MAX,
+} lagg_proto;
struct lagg_protos {
const char *lpr_name;
- int lpr_proto;
+ lagg_proto lpr_proto;
};
#define LAGG_PROTO_DEFAULT LAGG_PROTO_FAILOVER
@@ -137,7 +137,35 @@
#define SIOCGLAGGFLAGS _IOWR('i', 145, struct lagg_reqflags)
#define SIOCSLAGGHASH _IOW('i', 146, struct lagg_reqflags)
+struct lagg_reqopts {
+ char ro_ifname[IFNAMSIZ]; /* name of the lagg */
+
+ int ro_opts; /* Option bitmap */
+#define LAGG_OPT_NONE 0x00
+#define LAGG_OPT_USE_FLOWID 0x01 /* use M_FLOWID */
+/* Pseudo flags which are used in ro_opts but not stored into sc_opts. */
+#define LAGG_OPT_FLOWIDSHIFT 0x02 /* Set flowid */
+#define LAGG_OPT_FLOWIDSHIFT_MASK 0x1f /* flowid is uint32_t */
+#define LAGG_OPT_LACP_STRICT 0x10 /* LACP strict mode */
+#define LAGG_OPT_LACP_TXTEST 0x20 /* LACP debug: txtest */
+#define LAGG_OPT_LACP_RXTEST 0x40 /* LACP debug: rxtest */
+#define LAGG_OPT_LACP_TIMEOUT 0x80 /* LACP timeout */
+ u_int ro_count; /* number of ports */
+ u_int ro_active; /* active port count */
+ u_int ro_flapping; /* number of flapping */
+ int ro_flowid_shift; /* shift the flowid */
+};
+
+#define SIOCGLAGGOPTS _IOWR('i', 152, struct lagg_reqopts)
+#define SIOCSLAGGOPTS _IOW('i', 153, struct lagg_reqopts)
+
+#define LAGG_OPT_BITS "\020\001USE_FLOWID\005LACP_STRICT" \
+ "\006LACP_TXTEST\007LACP_RXTEST"
+
#ifdef _KERNEL
+
+#include <sys/counter.h>
+
/*
* Internal kernel part
*/
@@ -174,6 +202,7 @@
};
struct lagg_mc {
+ struct sockaddr_dl mc_addr;
struct ifmultiaddr *mc_ifma;
SLIST_ENTRY(lagg_mc) mc_entries;
};
@@ -182,14 +211,19 @@
struct lagg_llq {
struct ifnet *llq_ifp;
uint8_t llq_lladdr[ETHER_ADDR_LEN];
+ uint8_t llq_primary;
SLIST_ENTRY(lagg_llq) llq_entries;
};
struct lagg_softc {
struct ifnet *sc_ifp; /* virtual interface */
- struct rwlock sc_mtx;
+ struct rmlock sc_mtx;
+ struct mtx sc_call_mtx;
int sc_proto; /* lagg protocol */
u_int sc_count; /* number of ports */
+ u_int sc_active; /* active port count */
+ u_int sc_flapping; /* number of flapping
+ * events */
struct lagg_port *sc_primary; /* primary port */
struct ifmedia sc_media; /* media config */
caddr_t sc_psc; /* protocol data */
@@ -196,6 +230,11 @@
uint32_t sc_seq; /* sequence counter */
uint32_t sc_flags;
+ counter_u64_t sc_ipackets;
+ counter_u64_t sc_opackets;
+ counter_u64_t sc_ibytes;
+ counter_u64_t sc_obytes;
+
SLIST_HEAD(__tplhd, lagg_port) sc_ports; /* list of interfaces */
SLIST_ENTRY(lagg_softc) sc_entries;
@@ -216,12 +255,11 @@
void (*sc_lladdr)(struct lagg_softc *);
void (*sc_req)(struct lagg_softc *, caddr_t);
void (*sc_portreq)(struct lagg_port *, caddr_t);
-#if __FreeBSD_version >= 800000
eventhandler_tag vlan_attach;
eventhandler_tag vlan_detach;
-#endif
- struct sysctl_ctx_list ctx; /* sysctl variables */
- int use_flowid; /* use M_FLOWID */
+ struct callout sc_callout;
+ u_int sc_opts;
+ int flowid_shift; /* set flowid shift*/
};
struct lagg_port {
@@ -241,21 +279,26 @@
/* Redirected callbacks */
int (*lp_ioctl)(struct ifnet *, u_long, caddr_t);
- int (*lp_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *);
+ int (*lp_output)(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
SLIST_ENTRY(lagg_port) lp_entries;
};
-#define LAGG_LOCK_INIT(_sc) rw_init(&(_sc)->sc_mtx, "if_lagg rwlock")
-#define LAGG_LOCK_DESTROY(_sc) rw_destroy(&(_sc)->sc_mtx)
-#define LAGG_RLOCK(_sc) rw_rlock(&(_sc)->sc_mtx)
-#define LAGG_WLOCK(_sc) rw_wlock(&(_sc)->sc_mtx)
-#define LAGG_RUNLOCK(_sc) rw_runlock(&(_sc)->sc_mtx)
-#define LAGG_WUNLOCK(_sc) rw_wunlock(&(_sc)->sc_mtx)
-#define LAGG_RLOCK_ASSERT(_sc) rw_assert(&(_sc)->sc_mtx, RA_RLOCKED)
-#define LAGG_WLOCK_ASSERT(_sc) rw_assert(&(_sc)->sc_mtx, RA_WLOCKED)
+#define LAGG_LOCK_INIT(_sc) rm_init(&(_sc)->sc_mtx, "if_lagg rmlock")
+#define LAGG_LOCK_DESTROY(_sc) rm_destroy(&(_sc)->sc_mtx)
+#define LAGG_RLOCK(_sc, _p) rm_rlock(&(_sc)->sc_mtx, (_p))
+#define LAGG_WLOCK(_sc) rm_wlock(&(_sc)->sc_mtx)
+#define LAGG_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->sc_mtx, (_p))
+#define LAGG_WUNLOCK(_sc) rm_wunlock(&(_sc)->sc_mtx)
+#define LAGG_RLOCK_ASSERT(_sc) rm_assert(&(_sc)->sc_mtx, RA_RLOCKED)
+#define LAGG_WLOCK_ASSERT(_sc) rm_assert(&(_sc)->sc_mtx, RA_WLOCKED)
+#define LAGG_CALLOUT_LOCK_INIT(_sc) \
+ mtx_init(&(_sc)->sc_call_mtx, "if_lagg callout mutex", NULL,\
+ MTX_DEF)
+#define LAGG_CALLOUT_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->sc_call_mtx)
+
extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
extern void (*lagg_linkstate_p)(struct ifnet *, int );
@@ -262,6 +305,8 @@
int lagg_enqueue(struct ifnet *, struct mbuf *);
uint32_t lagg_hashmbuf(struct lagg_softc *, struct mbuf *, uint32_t);
+SYSCTL_DECL(_net_link_lagg);
+
#endif /* _KERNEL */
#endif /* _NET_LAGG_H */
Modified: trunk/sys/net/if_llatbl.c
===================================================================
--- trunk/sys/net/if_llatbl.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_llatbl.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -26,7 +26,7 @@
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_llatbl.c 248852 2013-03-28 20:48:40Z emaste $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_llatbl.c 294500 2016-01-21 14:04:02Z bz $");
#include "opt_ddb.h"
#include "opt_inet.h"
@@ -63,14 +63,10 @@
MALLOC_DEFINE(M_LLTABLE, "lltable", "link level address tables");
-static VNET_DEFINE(SLIST_HEAD(, lltable), lltables);
+static VNET_DEFINE(SLIST_HEAD(, lltable), lltables) =
+ SLIST_HEAD_INITIALIZER(lltables);
#define V_lltables VNET(lltables)
-extern void arprequest(struct ifnet *, struct in_addr *, struct in_addr *,
- u_char *);
-
-static void vnet_lltable_init(void);
-
struct rwlock lltable_rwlock;
RW_SYSINIT(lltable_rwlock, &lltable_rwlock, "lltable_rwlock");
@@ -277,10 +273,9 @@
u_int laflags = 0, flags = 0;
int error = 0;
- if (dl == NULL || dl->sdl_family != AF_LINK) {
- log(LOG_INFO, "%s: invalid dl\n", __func__);
- return EINVAL;
- }
+ KASSERT(dl != NULL && dl->sdl_family == AF_LINK,
+ ("%s: invalid dl\n", __func__));
+
ifp = ifnet_byindex(dl->sdl_index);
if (ifp == NULL) {
log(LOG_INFO, "%s: invalid ifp (sdl_index %d)\n",
@@ -290,28 +285,8 @@
switch (rtm->rtm_type) {
case RTM_ADD:
- if (rtm->rtm_flags & RTF_ANNOUNCE) {
+ if (rtm->rtm_flags & RTF_ANNOUNCE)
flags |= LLE_PUB;
-#ifdef INET
- if (dst->sa_family == AF_INET &&
- ((struct sockaddr_inarp *)dst)->sin_other != 0) {
- struct rtentry *rt;
- ((struct sockaddr_inarp *)dst)->sin_other = 0;
- rt = rtalloc1(dst, 0, 0);
- if (rt == NULL || !(rt->rt_flags & RTF_HOST)) {
- log(LOG_INFO, "%s: RTM_ADD publish "
- "(proxy only) is invalid\n",
- __func__);
- if (rt)
- RTFREE_LOCKED(rt);
- return EINVAL;
- }
- RTFREE_LOCKED(rt);
-
- flags |= LLE_PROXY;
- }
-#endif
- }
flags |= LLE_CREATE;
break;
@@ -350,7 +325,7 @@
* LLE_DELETED flag, and reset the expiration timer
*/
bcopy(LLADDR(dl), &lle->ll_addr, ifp->if_addrlen);
- lle->la_flags |= (flags & (LLE_PUB | LLE_PROXY));
+ lle->la_flags |= (flags & LLE_PUB);
lle->la_flags |= LLE_VALID;
lle->la_flags &= ~LLE_DELETED;
#ifdef INET6
@@ -372,15 +347,12 @@
laflags = lle->la_flags;
LLE_WUNLOCK(lle);
#ifdef INET
- /* gratuitous ARP */
- if ((laflags & LLE_PUB) && dst->sa_family == AF_INET) {
+ /* gratuitous ARP */
+ if ((laflags & LLE_PUB) && dst->sa_family == AF_INET)
arprequest(ifp,
&((struct sockaddr_in *)dst)->sin_addr,
&((struct sockaddr_in *)dst)->sin_addr,
- ((laflags & LLE_PROXY) ?
- (u_char *)IF_LLADDR(ifp) :
- (u_char *)LLADDR(dl)));
- }
+ (u_char *)LLADDR(dl));
#endif
} else {
if (flags & LLE_EXCLUSIVE)
@@ -395,15 +367,6 @@
return (error);
}
-static void
-vnet_lltable_init()
-{
-
- SLIST_INIT(&V_lltables);
-}
-VNET_SYSINIT(vnet_lltable_init, SI_SUB_PSEUDO, SI_ORDER_FIRST,
- vnet_lltable_init, NULL);
-
#ifdef DDB
struct llentry_sa {
struct llentry base;
Modified: trunk/sys/net/if_llatbl.h
===================================================================
--- trunk/sys/net/if_llatbl.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_llatbl.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -26,13 +26,11 @@
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_llatbl.h 240313 2012-09-10 12:25:57Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_llatbl.h 254963 2013-08-27 16:45:00Z alfred $");
#ifndef _NET_IF_LLATBL_H_
#define _NET_IF_LLATBL_H_
-#include "opt_ofed.h"
-
#include <sys/_rwlock.h>
#include <netinet/in.h>
@@ -76,9 +74,7 @@
union {
uint64_t mac_aligned;
uint16_t mac16[3];
-#ifdef OFED
uint8_t mac8[20]; /* IB needs 20 bytes. */
-#endif
} ll_addr;
/* XXX af-private? */
@@ -173,7 +169,6 @@
#define LLE_STATIC 0x0002 /* entry is static */
#define LLE_IFADDR 0x0004 /* entry is interface addr */
#define LLE_VALID 0x0008 /* ll_addr is valid */
-#define LLE_PROXY 0x0010 /* proxy entry ??? */
#define LLE_PUB 0x0020 /* publish entry ??? */
#define LLE_LINKED 0x0040 /* linked to lookup structure */
#define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */
@@ -206,4 +201,14 @@
}
int lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *);
+
+#include <sys/eventhandler.h>
+enum {
+ LLENTRY_RESOLVED,
+ LLENTRY_TIMEDOUT,
+ LLENTRY_DELETED,
+ LLENTRY_EXPIRED,
+};
+typedef void (*lle_event_fn)(void *, struct llentry *, int);
+EVENTHANDLER_DECLARE(lle_event, lle_event_fn);
#endif /* _NET_IF_LLATBL_H_ */
Modified: trunk/sys/net/if_llc.h
===================================================================
--- trunk/sys/net/if_llc.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_llc.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -30,7 +30,7 @@
* SUCH DAMAGE.
*
* @(#)if_llc.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/if_llc.h 164804 2006-12-01 17:50:11Z imp $
+ * $FreeBSD: stable/10/sys/net/if_llc.h 164804 2006-12-01 17:50:11Z imp $
*/
#ifndef _NET_IF_LLC_H_
Modified: trunk/sys/net/if_loop.c
===================================================================
--- trunk/sys/net/if_loop.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_loop.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)if_loop.c 8.2 (Berkeley) 1/9/95
- * $FreeBSD: stable/9/sys/net/if_loop.c 238876 2012-07-28 23:11:09Z bz $
+ * $FreeBSD: stable/10/sys/net/if_loop.c 285605 2015-07-15 16:57:40Z pkelsey $
*/
/*
@@ -102,7 +102,7 @@
int loioctl(struct ifnet *, u_long, caddr_t);
static void lortrequest(int, struct rtentry *, struct rt_addrinfo *);
int looutput(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro);
+ const struct sockaddr *dst, struct route *ro);
static int lo_clone_create(struct if_clone *, int, caddr_t);
static void lo_clone_destroy(struct ifnet *);
@@ -109,13 +109,12 @@
VNET_DEFINE(struct ifnet *, loif); /* Used externally */
#ifdef VIMAGE
-static VNET_DEFINE(struct ifc_simple_data, lo_cloner_data);
-static VNET_DEFINE(struct if_clone, lo_cloner);
-#define V_lo_cloner_data VNET(lo_cloner_data)
+static VNET_DEFINE(struct if_clone *, lo_cloner);
#define V_lo_cloner VNET(lo_cloner)
#endif
-IFC_SIMPLE_DECLARE(lo, 1);
+static struct if_clone *lo_cloner;
+static const char loname[] = "lo";
static void
lo_clone_destroy(struct ifnet *ifp)
@@ -140,7 +139,7 @@
if (ifp == NULL)
return (ENOSPC);
- if_initname(ifp, ifc->ifc_name, unit);
+ if_initname(ifp, loname, unit);
ifp->if_mtu = LOMTU;
ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
ifp->if_ioctl = loioctl;
@@ -162,12 +161,12 @@
{
#ifdef VIMAGE
+ lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy,
+ 1);
V_lo_cloner = lo_cloner;
- V_lo_cloner_data = lo_cloner_data;
- V_lo_cloner.ifc_data = &V_lo_cloner_data;
- if_clone_attach(&V_lo_cloner);
#else
- if_clone_attach(&lo_cloner);
+ lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy,
+ 1);
#endif
}
VNET_SYSINIT(vnet_loif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
@@ -178,7 +177,7 @@
vnet_loif_uninit(const void *unused __unused)
{
- if_clone_detach(&V_lo_cloner);
+ if_clone_detach(V_lo_cloner);
V_loif = NULL;
}
VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
@@ -212,7 +211,7 @@
DECLARE_MODULE(if_lo, loop_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
int
-looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+looutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct route *ro)
{
u_int32_t af;
@@ -243,13 +242,13 @@
ifp->if_obytes += m->m_pkthdr.len;
/* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
+ if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
+ else
+ af = dst->sa_family;
#if 1 /* XXX */
- switch (dst->sa_family) {
+ switch (af) {
case AF_INET:
if (ifp->if_capenable & IFCAP_RXCSUM) {
m->m_pkthdr.csum_data = 0xffff;
@@ -278,12 +277,12 @@
case AF_APPLETALK:
break;
default:
- printf("looutput: af=%d unexpected\n", dst->sa_family);
+ printf("looutput: af=%d unexpected\n", af);
m_freem(m);
return (EAFNOSUPPORT);
}
#endif
- return (if_simloop(ifp, m, dst->sa_family, 0));
+ return (if_simloop(ifp, m, af, 0));
}
/*
@@ -396,7 +395,7 @@
{
RT_LOCK_ASSERT(rt);
- rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+ rt->rt_mtu = rt->rt_ifp->if_mtu;
}
/*
Added: trunk/sys/net/if_me.c
===================================================================
--- trunk/sys/net/if_me.c (rev 0)
+++ trunk/sys/net/if_me.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,675 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 Andrey V. Elsukov <ae at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/net/if_me.c 290347 2015-11-04 00:21:02Z hrs $");
+
+#include <sys/param.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/libkern.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mbuf.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/protosw.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sx.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/vnet.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_encap.h>
+
+#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
+#define MEMTU 1500
+static const char mename[] = "me";
+static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP");
+static VNET_DEFINE(struct mtx, me_mtx);
+#define V_me_mtx VNET(me_mtx)
+/* Minimal forwarding header RFC 2004 */
+struct mobhdr {
+ uint8_t mob_proto; /* protocol */
+ uint8_t mob_flags; /* flags */
+#define MOB_FLAGS_SP 0x80 /* source present */
+ uint16_t mob_csum; /* header checksum */
+ struct in_addr mob_dst; /* original destination address */
+ struct in_addr mob_src; /* original source addr (optional) */
+} __packed;
+
+struct me_softc {
+ struct ifnet *me_ifp;
+ LIST_ENTRY(me_softc) me_list;
+ struct rmlock me_lock;
+ u_int me_fibnum;
+ const struct encaptab *me_ecookie;
+ struct in_addr me_src;
+ struct in_addr me_dst;
+};
+#define ME2IFP(sc) ((sc)->me_ifp)
+#define ME_READY(sc) ((sc)->me_src.s_addr != 0)
+#define ME_LOCK_INIT(sc) rm_init(&(sc)->me_lock, "me softc")
+#define ME_LOCK_DESTROY(sc) rm_destroy(&(sc)->me_lock)
+#define ME_RLOCK_TRACKER struct rm_priotracker me_tracker
+#define ME_RLOCK(sc) rm_rlock(&(sc)->me_lock, &me_tracker)
+#define ME_RUNLOCK(sc) rm_runlock(&(sc)->me_lock, &me_tracker)
+#define ME_RLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_RLOCKED)
+#define ME_WLOCK(sc) rm_wlock(&(sc)->me_lock)
+#define ME_WUNLOCK(sc) rm_wunlock(&(sc)->me_lock)
+#define ME_WLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_WLOCKED)
+
+#define ME_LIST_LOCK_INIT(x) mtx_init(&V_me_mtx, "me_mtx", NULL, MTX_DEF)
+#define ME_LIST_LOCK_DESTROY(x) mtx_destroy(&V_me_mtx)
+#define ME_LIST_LOCK(x) mtx_lock(&V_me_mtx)
+#define ME_LIST_UNLOCK(x) mtx_unlock(&V_me_mtx)
+
+static VNET_DEFINE(LIST_HEAD(, me_softc), me_softc_list);
+#define V_me_softc_list VNET(me_softc_list)
+static struct sx me_ioctl_sx;
+SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl");
+
+static int me_clone_create(struct if_clone *, int, caddr_t);
+static void me_clone_destroy(struct ifnet *);
+static VNET_DEFINE(struct if_clone *, me_cloner);
+#define V_me_cloner VNET(me_cloner)
+
+static void me_qflush(struct ifnet *);
+static int me_transmit(struct ifnet *, struct mbuf *);
+static int me_ioctl(struct ifnet *, u_long, caddr_t);
+static int me_output(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
+static int me_input(struct mbuf **, int *, int);
+
+static int me_set_tunnel(struct ifnet *, struct sockaddr_in *,
+ struct sockaddr_in *);
+static void me_delete_tunnel(struct ifnet *);
+
+SYSCTL_DECL(_net_link);
+static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0,
+ "Minimal Encapsulation for IP (RFC 2004)");
+#ifndef MAX_ME_NEST
+#define MAX_ME_NEST 1
+#endif
+
+static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST;
+#define V_max_me_nesting VNET(max_me_nesting)
+SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
+ &VNET_NAME(max_me_nesting), 0, "Max nested tunnels");
+
+extern struct domain inetdomain;
+static void me_input10(struct mbuf *, int);
+static const struct protosw in_mobile_protosw = {
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_MOBILE,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = me_input10,
+ .pr_output = (pr_output_t *)rip_output,
+ .pr_ctlinput = rip_ctlinput,
+ .pr_ctloutput = rip_ctloutput,
+ .pr_usrreqs = &rip_usrreqs
+};
+
+static void
+vnet_me_init(const void *unused __unused)
+{
+ LIST_INIT(&V_me_softc_list);
+ ME_LIST_LOCK_INIT();
+ V_me_cloner = if_clone_simple(mename, me_clone_create,
+ me_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_me_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_me_init, NULL);
+
+static void
+vnet_me_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(V_me_cloner);
+ ME_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_me_uninit, NULL);
+
+static int
+me_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct me_softc *sc;
+
+ sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO);
+ sc->me_fibnum = curthread->td_proc->p_fibnum;
+ ME2IFP(sc) = if_alloc(IFT_TUNNEL);
+ ME_LOCK_INIT(sc);
+ ME2IFP(sc)->if_softc = sc;
+ if_initname(ME2IFP(sc), mename, unit);
+
+ ME2IFP(sc)->if_mtu = MEMTU - sizeof(struct mobhdr);
+ ME2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
+ ME2IFP(sc)->if_output = me_output;
+ ME2IFP(sc)->if_ioctl = me_ioctl;
+ ME2IFP(sc)->if_transmit = me_transmit;
+ ME2IFP(sc)->if_qflush = me_qflush;
+ ME2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
+ ME2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
+ if_attach(ME2IFP(sc));
+ bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t));
+ ME_LIST_LOCK();
+ LIST_INSERT_HEAD(&V_me_softc_list, sc, me_list);
+ ME_LIST_UNLOCK();
+ return (0);
+}
+
+static void
+me_clone_destroy(struct ifnet *ifp)
+{
+ struct me_softc *sc;
+
+ sx_xlock(&me_ioctl_sx);
+ sc = ifp->if_softc;
+ me_delete_tunnel(ifp);
+ ME_LIST_LOCK();
+ LIST_REMOVE(sc, me_list);
+ ME_LIST_UNLOCK();
+ bpfdetach(ifp);
+ if_detach(ifp);
+ ifp->if_softc = NULL;
+ sx_xunlock(&me_ioctl_sx);
+
+ if_free(ifp);
+ ME_LOCK_DESTROY(sc);
+ free(sc, M_IFME);
+}
+
+static int
+me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ ME_RLOCK_TRACKER;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct sockaddr_in *src, *dst;
+ struct me_softc *sc;
+ int error;
+
+ switch (cmd) {
+ case SIOCSIFMTU:
+ if (ifr->ifr_mtu < 576)
+ return (EINVAL);
+ ifp->if_mtu = ifr->ifr_mtu - sizeof(struct mobhdr);
+ return (0);
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ case SIOCSIFFLAGS:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ return (0);
+ }
+ sx_xlock(&me_ioctl_sx);
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENXIO;
+ goto end;
+ }
+ error = 0;
+ switch (cmd) {
+ case SIOCSIFPHYADDR:
+ src = (struct sockaddr_in *)
+ &(((struct in_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr_in *)
+ &(((struct in_aliasreq *)data)->ifra_dstaddr);
+ if (src->sin_family != dst->sin_family ||
+ src->sin_family != AF_INET ||
+ src->sin_len != dst->sin_len ||
+ src->sin_len != sizeof(struct sockaddr_in)) {
+ error = EINVAL;
+ break;
+ }
+ if (src->sin_addr.s_addr == INADDR_ANY ||
+ dst->sin_addr.s_addr == INADDR_ANY) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ error = me_set_tunnel(ifp, src, dst);
+ break;
+ case SIOCDIFPHYADDR:
+ me_delete_tunnel(ifp);
+ break;
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ ME_RLOCK(sc);
+ if (!ME_READY(sc)) {
+ error = EADDRNOTAVAIL;
+ ME_RUNLOCK(sc);
+ break;
+ }
+ src = (struct sockaddr_in *)&ifr->ifr_addr;
+ memset(src, 0, sizeof(*src));
+ src->sin_family = AF_INET;
+ src->sin_len = sizeof(*src);
+ switch (cmd) {
+ case SIOCGIFPSRCADDR:
+ src->sin_addr = sc->me_src;
+ break;
+ case SIOCGIFPDSTADDR:
+ src->sin_addr = sc->me_dst;
+ break;
+ }
+ ME_RUNLOCK(sc);
+ error = prison_if(curthread->td_ucred, sintosa(src));
+ if (error != 0)
+ memset(src, 0, sizeof(*src));
+ break;
+ case SIOCGTUNFIB:
+ ifr->ifr_fib = sc->me_fibnum;
+ break;
+ case SIOCSTUNFIB:
+ if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
+ break;
+ if (ifr->ifr_fib >= rt_numfibs)
+ error = EINVAL;
+ else
+ sc->me_fibnum = ifr->ifr_fib;
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+end:
+ sx_xunlock(&me_ioctl_sx);
+ return (error);
+}
+
+static int
+me_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+{
+ ME_RLOCK_TRACKER;
+ struct me_softc *sc;
+ struct ip *ip;
+ int ret;
+
+ sc = (struct me_softc *)arg;
+ if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
+ return (0);
+
+ M_ASSERTPKTHDR(m);
+
+ if (m->m_pkthdr.len < sizeof(struct ip) + sizeof(struct mobhdr) -
+ sizeof(struct in_addr))
+ return (0);
+
+ ret = 0;
+ ME_RLOCK(sc);
+ if (ME_READY(sc)) {
+ ip = mtod(m, struct ip *);
+ if (sc->me_src.s_addr == ip->ip_dst.s_addr &&
+ sc->me_dst.s_addr == ip->ip_src.s_addr)
+ ret = 32 * 2;
+ }
+ ME_RUNLOCK(sc);
+ return (ret);
+}
+
+static int
+me_set_tunnel(struct ifnet *ifp, struct sockaddr_in *src,
+ struct sockaddr_in *dst)
+{
+ struct me_softc *sc, *tsc;
+
+ sx_assert(&me_ioctl_sx, SA_XLOCKED);
+ ME_LIST_LOCK();
+ sc = ifp->if_softc;
+ LIST_FOREACH(tsc, &V_me_softc_list, me_list) {
+ if (tsc == sc || !ME_READY(tsc))
+ continue;
+ if (tsc->me_src.s_addr == src->sin_addr.s_addr &&
+ tsc->me_dst.s_addr == dst->sin_addr.s_addr) {
+ ME_LIST_UNLOCK();
+ return (EADDRNOTAVAIL);
+ }
+ }
+ ME_LIST_UNLOCK();
+
+ ME_WLOCK(sc);
+ sc->me_dst = dst->sin_addr;
+ sc->me_src = src->sin_addr;
+ ME_WUNLOCK(sc);
+
+ if (sc->me_ecookie == NULL)
+ sc->me_ecookie = encap_attach_func(AF_INET, IPPROTO_MOBILE,
+ me_encapcheck, &in_mobile_protosw, sc);
+ if (sc->me_ecookie != NULL) {
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ if_link_state_change(ifp, LINK_STATE_UP);
+ }
+ return (0);
+}
+
+static void
+me_delete_tunnel(struct ifnet *ifp)
+{
+ struct me_softc *sc = ifp->if_softc;
+
+ sx_assert(&me_ioctl_sx, SA_XLOCKED);
+ if (sc->me_ecookie != NULL)
+ encap_detach(sc->me_ecookie);
+ sc->me_ecookie = NULL;
+ ME_WLOCK(sc);
+ sc->me_src.s_addr = 0;
+ sc->me_dst.s_addr = 0;
+ ME_WUNLOCK(sc);
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+}
+
+static uint16_t
+me_in_cksum(uint16_t *p, int nwords)
+{
+ uint32_t sum = 0;
+
+ while (nwords-- > 0)
+ sum += *p++;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16);
+ return (~sum);
+}
+
+static void
+me_input10(struct mbuf *m, int off)
+{
+ int proto;
+
+ proto = (mtod(m, struct ip *))->ip_p;
+ me_input(&m, &off, proto);
+}
+
+int
+me_input(struct mbuf **mp, int *offp, int proto)
+{
+ struct me_softc *sc;
+ struct mobhdr *mh;
+ struct ifnet *ifp;
+ struct mbuf *m;
+ struct ip *ip;
+ int hlen;
+
+ m = *mp;
+ sc = encap_getarg(m);
+ KASSERT(sc != NULL, ("encap_getarg returned NULL"));
+
+ ifp = ME2IFP(sc);
+ /* checks for short packets */
+ hlen = sizeof(struct mobhdr);
+ if (m->m_pkthdr.len < sizeof(struct ip) + hlen)
+ hlen -= sizeof(struct in_addr);
+ if (m->m_len < sizeof(struct ip) + hlen)
+ m = m_pullup(m, sizeof(struct ip) + hlen);
+ if (m == NULL)
+ goto drop;
+ mh = (struct mobhdr *)mtodo(m, sizeof(struct ip));
+ /* check for wrong flags */
+ if (mh->mob_flags & (~MOB_FLAGS_SP)) {
+ m_freem(m);
+ goto drop;
+ }
+ if (mh->mob_flags) {
+ if (hlen != sizeof(struct mobhdr)) {
+ m_freem(m);
+ goto drop;
+ }
+ } else
+ hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
+ /* check mobile header checksum */
+ if (me_in_cksum((uint16_t *)mh, hlen / sizeof(uint16_t)) != 0) {
+ m_freem(m);
+ goto drop;
+ }
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+ ip = mtod(m, struct ip *);
+ ip->ip_dst = mh->mob_dst;
+ ip->ip_p = mh->mob_proto;
+ ip->ip_sum = 0;
+ ip->ip_len = htons(m->m_pkthdr.len - hlen);
+ if (mh->mob_flags)
+ ip->ip_src = mh->mob_src;
+ memmove(mtodo(m, hlen), ip, sizeof(struct ip));
+ m_adj(m, hlen);
+ m_clrprotoflags(m);
+ m->m_pkthdr.rcvif = ifp;
+ m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
+ M_SETFIB(m, ifp->if_fib);
+ hlen = AF_INET;
+ BPF_MTAP2(ifp, &hlen, sizeof(hlen), m);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ if ((ifp->if_flags & IFF_MONITOR) != 0)
+ m_freem(m);
+ else
+ netisr_dispatch(NETISR_IP, m);
+ return (IPPROTO_DONE);
+drop:
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ return (IPPROTO_DONE);
+}
+
+#define MTAG_ME 1414491977
+static int
+me_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+ struct m_tag *mtag;
+ int count;
+
+ count = 1;
+ mtag = NULL;
+ while ((mtag = m_tag_locate(m, MTAG_ME, 0, mtag)) != NULL) {
+ if (*(struct ifnet **)(mtag + 1) == ifp) {
+ log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
+ return (EIO);
+ }
+ count++;
+ }
+ if (count > V_max_me_nesting) {
+ log(LOG_NOTICE,
+ "%s: if_output recursively called too many times(%d)\n",
+ ifp->if_xname, count);
+ return (EIO);
+ }
+ mtag = m_tag_alloc(MTAG_ME, 0, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL)
+ return (ENOMEM);
+ *(struct ifnet **)(mtag + 1) = ifp;
+ m_tag_prepend(m, mtag);
+ return (0);
+}
+
+static int
+me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ uint32_t af;
+ int error;
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error != 0)
+ goto drop;
+#endif
+ if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+ (ifp->if_flags & IFF_UP) == 0) {
+ error = ENETDOWN;
+ goto drop;
+ }
+
+ error = me_check_nesting(ifp, m);
+ if (error != 0)
+ goto drop;
+
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+ if (dst->sa_family == AF_UNSPEC)
+ bcopy(dst->sa_data, &af, sizeof(af));
+ else
+ af = dst->sa_family;
+ if (af != AF_INET) {
+ error = EAFNOSUPPORT;
+ goto drop;
+ }
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ return (ifp->if_transmit(ifp, m));
+drop:
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (error);
+}
+
+static int
+me_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+ ME_RLOCK_TRACKER;
+ struct mobhdr mh;
+ struct me_softc *sc;
+ struct ip *ip;
+ int error, hlen, plen;
+
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
+ }
+ if (m->m_len < sizeof(struct ip))
+ m = m_pullup(m, sizeof(struct ip));
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto drop;
+ }
+ ip = mtod(m, struct ip *);
+ /* Fragmented datagramms shouldn't be encapsulated */
+ if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
+ error = EINVAL;
+ m_freem(m);
+ goto drop;
+ }
+ mh.mob_proto = ip->ip_p;
+ mh.mob_src = ip->ip_src;
+ mh.mob_dst = ip->ip_dst;
+ ME_RLOCK(sc);
+ if (!ME_READY(sc)) {
+ ME_RUNLOCK(sc);
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
+ }
+ if (in_hosteq(sc->me_src, ip->ip_src)) {
+ hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
+ mh.mob_flags = 0;
+ } else {
+ hlen = sizeof(struct mobhdr);
+ mh.mob_flags = MOB_FLAGS_SP;
+ }
+ plen = m->m_pkthdr.len;
+ ip->ip_src = sc->me_src;
+ ip->ip_dst = sc->me_dst;
+ M_SETFIB(m, sc->me_fibnum);
+ ME_RUNLOCK(sc);
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto drop;
+ }
+ if (m->m_len < sizeof(struct ip) + hlen)
+ m = m_pullup(m, sizeof(struct ip) + hlen);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto drop;
+ }
+ memmove(mtod(m, void *), mtodo(m, hlen), sizeof(struct ip));
+ ip = mtod(m, struct ip *);
+ ip->ip_len = htons(m->m_pkthdr.len);
+ ip->ip_p = IPPROTO_MOBILE;
+ ip->ip_sum = 0;
+ mh.mob_csum = 0;
+ mh.mob_csum = me_in_cksum((uint16_t *)&mh, hlen / sizeof(uint16_t));
+ bcopy(&mh, mtodo(m, sizeof(struct ip)), hlen);
+ error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
+drop:
+ if (error)
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ else {
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
+ }
+ return (error);
+}
+
+static void
+me_qflush(struct ifnet *ifp __unused)
+{
+
+}
+
+static int
+memodevent(module_t mod, int type, void *data)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ case MOD_UNLOAD:
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t me_mod = {
+ "if_me",
+ memodevent,
+ 0
+};
+
+DECLARE_MODULE(if_me, me_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_me, 1);
Property changes on: trunk/sys/net/if_me.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/if_media.c
===================================================================
--- trunk/sys/net/if_media.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_media.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
/* $NetBSD: if_media.c,v 1.1 1997/03/17 02:55:15 thorpej Exp $ */
-/* $FreeBSD: stable/9/sys/net/if_media.c 218909 2011-02-21 09:01:34Z brucec $ */
+/* $FreeBSD: stable/10/sys/net/if_media.c 313387 2017-02-07 15:12:27Z rstone $ */
/*-
* Copyright (c) 1997
@@ -69,6 +69,7 @@
int flags, int mask);
#ifdef IFMEDIA_DEBUG
+#include <net/if_var.h>
int ifmedia_debug = 0;
SYSCTL_INT(_debug, OID_AUTO, ifmedia, CTLFLAG_RW, &ifmedia_debug,
0, "if_media debugging msgs");
@@ -105,6 +106,7 @@
LIST_REMOVE(entry, ifm_list);
free(entry, M_IFADDR);
}
+ ifm->ifm_cur = NULL;
}
/*
@@ -194,6 +196,21 @@
}
/*
+ * Given a media word, return one suitable for an application
+ * using the original encoding.
+ */
+static int
+compat_media(int media)
+{
+
+ if (IFM_TYPE(media) == IFM_ETHER && IFM_SUBTYPE(media) > IFM_OTHER) {
+ media &= ~(IFM_ETH_XTYPE|IFM_TMASK);
+ media |= IFM_OTHER;
+ }
+ return (media);
+}
+
+/*
* Device-independent media ioctl support function.
*/
int
@@ -272,6 +289,7 @@
* Get list of available media and current media on interface.
*/
case SIOCGIFMEDIA:
+ case SIOCGIFXMEDIA:
{
struct ifmedia_entry *ep;
int *kptr, count;
@@ -279,8 +297,13 @@
kptr = NULL; /* XXX gcc */
- ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
- ifm->ifm_cur->ifm_media : IFM_NONE;
+ if (cmd == SIOCGIFMEDIA) {
+ ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
+ compat_media(ifm->ifm_cur->ifm_media) : IFM_NONE;
+ } else {
+ ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
+ ifm->ifm_cur->ifm_media : IFM_NONE;
+ }
ifmr->ifm_mask = ifm->ifm_mask;
ifmr->ifm_status = 0;
(*ifm->ifm_status)(ifp, ifmr);
@@ -399,8 +422,7 @@
int i;
for (i = 0; ifmedia_baudrate_descriptions[i].ifmb_word != 0; i++) {
- if ((mword & (IFM_NMASK|IFM_TMASK)) ==
- ifmedia_baudrate_descriptions[i].ifmb_word)
+ if (IFM_TYPE_MATCH(mword, ifmedia_baudrate_descriptions[i].ifmb_word))
return (ifmedia_baudrate_descriptions[i].ifmb_baudrate);
}
@@ -506,7 +528,7 @@
printf("<unknown type>\n");
return;
}
- printf(desc->ifmt_string);
+ printf("%s", desc->ifmt_string);
/* Any mode. */
for (desc = ttos->modes; desc && desc->ifmt_string != NULL; desc++)
Modified: trunk/sys/net/if_media.h
===================================================================
--- trunk/sys/net/if_media.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_media.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
/* $NetBSD: if_media.h,v 1.3 1997/03/26 01:19:27 thorpej Exp $ */
-/* $FreeBSD: stable/9/sys/net/if_media.h 235764 2012-05-22 00:00:17Z jhb $ */
+/* $FreeBSD: stable/10/sys/net/if_media.h 283758 2015-05-29 23:02:12Z erj $ */
/*-
* Copyright (c) 1997
@@ -116,7 +116,7 @@
* ---- -------
* 0-4 Media variant
* 5-7 Media type
- * 8-15 Type specific options
+ * 8-15 Type specific options (includes added variant bits on Ethernet)
* 16-18 Mode (for multi-mode devices)
* 19 RFU
* 20-27 Shared (global) options
@@ -125,8 +125,18 @@
/*
* Ethernet
+ * In order to use more than 31 subtypes, Ethernet uses some of the option
+ * bits as part of the subtype field. See the options section below for
+ * relevant definitions
*/
#define IFM_ETHER 0x00000020
+#define IFM_ETHER_SUBTYPE(x) (((x) & IFM_TMASK) | \
+ (((x) & (IFM_ETH_XTYPE >> IFM_ETH_XSHIFT)) << IFM_ETH_XSHIFT))
+#define IFM_X(x) IFM_ETHER_SUBTYPE(x) /* internal shorthand */
+#define IFM_ETHER_SUBTYPE_SET(x) (IFM_ETHER_SUBTYPE(x) | IFM_ETHER)
+#define IFM_ETHER_SUBTYPE_GET(x) ((x) & (IFM_TMASK|IFM_ETH_XTYPE))
+#define IFM_ETHER_IS_EXTENDED(x) ((x) & IFM_ETH_XTYPE)
+
#define IFM_10_T 3 /* 10BaseT - RJ45 */
#define IFM_10_2 4 /* 10Base2 - Thinnet */
#define IFM_10_5 5 /* 10Base5 - AUI */
@@ -154,12 +164,49 @@
#define IFM_40G_CR4 27 /* 40GBase-CR4 */
#define IFM_40G_SR4 28 /* 40GBase-SR4 */
#define IFM_40G_LR4 29 /* 40GBase-LR4 */
+#define IFM_1000_KX 30 /* 1000Base-KX backplane */
+#define IFM_OTHER 31 /* Other: one of the following */
-/* note 31 is the max! */
+/* following types are not visible to old binaries using only IFM_TMASK */
+#define IFM_10G_KX4 IFM_X(32) /* 10GBase-KX4 backplane */
+#define IFM_10G_KR IFM_X(33) /* 10GBase-KR backplane */
+#define IFM_10G_CR1 IFM_X(34) /* 10GBase-CR1 Twinax splitter */
+#define IFM_20G_KR2 IFM_X(35) /* 20GBase-KR2 backplane */
+#define IFM_2500_KX IFM_X(36) /* 2500Base-KX backplane */
+#define IFM_2500_T IFM_X(37) /* 2500Base-T - RJ45 (NBaseT) */
+#define IFM_5000_T IFM_X(38) /* 5000Base-T - RJ45 (NBaseT) */
+#define IFM_50G_PCIE IFM_X(39) /* 50G Ethernet over PCIE */
+#define IFM_25G_PCIE IFM_X(40) /* 25G Ethernet over PCIE */
+#define IFM_1000_SGMII IFM_X(41) /* 1G media interface */
+#define IFM_10G_SFI IFM_X(42) /* 10G media interface */
+#define IFM_40G_XLPPI IFM_X(43) /* 40G media interface */
+#define IFM_1000_CX_SGMII IFM_X(44) /* 1000Base-CX-SGMII */
+#define IFM_40G_KR4 IFM_X(45) /* 40GBase-KR4 */
+#define IFM_10G_ER IFM_X(46) /* 10GBase-ER */
+#define IFM_100G_CR4 IFM_X(47) /* 100GBase-CR4 */
+#define IFM_100G_SR4 IFM_X(48) /* 100GBase-SR4 */
+#define IFM_100G_KR4 IFM_X(49) /* 100GBase-KR4 */
+#define IFM_100G_LR4 IFM_X(50) /* 100GBase-LR4 */
+#define IFM_56G_R4 IFM_X(51) /* 56GBase-R4 */
+#define IFM_100_T IFM_X(52) /* 100BaseT - RJ45 */
+#define IFM_25G_CR IFM_X(53) /* 25GBase-CR */
+#define IFM_25G_KR IFM_X(54) /* 25GBase-KR */
+#define IFM_25G_SR IFM_X(55) /* 25GBase-SR */
+#define IFM_50G_CR2 IFM_X(56) /* 50GBase-CR2 */
+#define IFM_50G_KR2 IFM_X(57) /* 50GBase-KR2 */
+/*
+ * Please update ieee8023ad_lacp.c:lacp_compose_key()
+ * after adding new Ethernet media types.
+ */
+/* Note IFM_X(511) is the max! */
+
+/* Ethernet option values; includes bits used for extended variant field */
#define IFM_ETH_MASTER 0x00000100 /* master mode (1000baseT) */
#define IFM_ETH_RXPAUSE 0x00000200 /* receive PAUSE frames */
#define IFM_ETH_TXPAUSE 0x00000400 /* transmit PAUSE frames */
+#define IFM_ETH_XTYPE 0x00007800 /* extended media variants */
+#define IFM_ETH_XSHIFT 6 /* shift XTYPE next to TMASK */
/*
* Token ring
@@ -251,11 +298,6 @@
#define IFM_ATM_UNASSIGNED 0x00000400 /* unassigned cells */
/*
- * CARP Common Address Redundancy Protocol
- */
-#define IFM_CARP 0x000000c0
-
-/*
* Shared media sub-types
*/
#define IFM_AUTO 0 /* Autoselect best media */
@@ -307,7 +349,10 @@
* Macros to extract various bits of information from the media word.
*/
#define IFM_TYPE(x) ((x) & IFM_NMASK)
-#define IFM_SUBTYPE(x) ((x) & IFM_TMASK)
+#define IFM_SUBTYPE(x) \
+ (IFM_TYPE(x) == IFM_ETHER ? IFM_ETHER_SUBTYPE_GET(x) : ((x) & IFM_TMASK))
+#define IFM_TYPE_MATCH(x,y) \
+ (IFM_TYPE(x) == IFM_TYPE(y) && IFM_SUBTYPE(x) == IFM_SUBTYPE(y))
#define IFM_TYPE_OPTIONS(x) ((x) & IFM_OMASK)
#define IFM_INST(x) (((x) & IFM_IMASK) >> IFM_ISHIFT)
#define IFM_OPTIONS(x) ((x) & (IFM_OMASK | IFM_GMASK))
@@ -341,7 +386,6 @@
{ IFM_FDDI, "FDDI" }, \
{ IFM_IEEE80211, "IEEE 802.11 Wireless Ethernet" }, \
{ IFM_ATM, "ATM" }, \
- { IFM_CARP, "Common Address Redundancy Protocol" }, \
{ 0, NULL }, \
}
@@ -373,6 +417,34 @@
{ IFM_40G_CR4, "40Gbase-CR4" }, \
{ IFM_40G_SR4, "40Gbase-SR4" }, \
{ IFM_40G_LR4, "40Gbase-LR4" }, \
+ { IFM_1000_KX, "1000Base-KX" }, \
+ { IFM_OTHER, "Other" }, \
+ { IFM_10G_KX4, "10GBase-KX4" }, \
+ { IFM_10G_KR, "10GBase-KR" }, \
+ { IFM_10G_CR1, "10GBase-CR1" }, \
+ { IFM_20G_KR2, "20GBase-KR2" }, \
+ { IFM_2500_KX, "2500Base-KX" }, \
+ { IFM_2500_T, "2500Base-T" }, \
+ { IFM_5000_T, "5000Base-T" }, \
+ { IFM_50G_PCIE, "PCIExpress-50G" }, \
+ { IFM_25G_PCIE, "PCIExpress-25G" }, \
+ { IFM_1000_SGMII, "1000Base-SGMII" }, \
+ { IFM_10G_SFI, "10GBase-SFI" }, \
+ { IFM_40G_XLPPI, "40GBase-XLPPI" }, \
+ { IFM_1000_CX_SGMII, "1000Base-CX-SGMII" }, \
+ { IFM_40G_KR4, "40GBase-KR4" }, \
+ { IFM_10G_ER, "10GBase-ER" }, \
+ { IFM_100G_CR4, "100GBase-CR4" }, \
+ { IFM_100G_SR4, "100GBase-SR4" }, \
+ { IFM_100G_KR4, "100GBase-KR4" }, \
+ { IFM_100G_LR4, "100GBase-LR4" }, \
+ { IFM_56G_R4, "56GBase-R4" }, \
+ { IFM_100_T, "100BaseT" }, \
+ { IFM_25G_CR, "25GBase-CR" }, \
+ { IFM_25G_KR, "25GBase-KR" }, \
+ { IFM_25G_SR, "25GBase-SR" }, \
+ { IFM_50G_CR2, "50GBase-CR2" }, \
+ { IFM_50G_KR2, "50GBase-KR2" }, \
{ 0, NULL }, \
}
@@ -674,6 +746,33 @@
{ IFM_ETHER | IFM_40G_CR4, IF_Gbps(40ULL) }, \
{ IFM_ETHER | IFM_40G_SR4, IF_Gbps(40ULL) }, \
{ IFM_ETHER | IFM_40G_LR4, IF_Gbps(40ULL) }, \
+ { IFM_ETHER | IFM_1000_KX, IF_Mbps(1000) }, \
+ { IFM_ETHER | IFM_10G_KX4, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_10G_KR, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_10G_CR1, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_20G_KR2, IF_Gbps(20ULL) }, \
+ { IFM_ETHER | IFM_2500_KX, IF_Mbps(2500) }, \
+ { IFM_ETHER | IFM_2500_T, IF_Mbps(2500) }, \
+ { IFM_ETHER | IFM_5000_T, IF_Mbps(5000) }, \
+ { IFM_ETHER | IFM_50G_PCIE, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_25G_PCIE, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_1000_SGMII, IF_Mbps(1000) }, \
+ { IFM_ETHER | IFM_10G_SFI, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_40G_XLPPI, IF_Gbps(40ULL) }, \
+ { IFM_ETHER | IFM_1000_CX_SGMII, IF_Mbps(1000) }, \
+ { IFM_ETHER | IFM_40G_KR4, IF_Gbps(40ULL) }, \
+ { IFM_ETHER | IFM_10G_ER, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_100G_CR4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_SR4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_KR4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_LR4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_56G_R4, IF_Gbps(56ULL) }, \
+ { IFM_ETHER | IFM_100_T, IF_Mbps(100ULL) }, \
+ { IFM_ETHER | IFM_25G_CR, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_25G_KR, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_25G_SR, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_50G_CR2, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_50G_KR2, IF_Gbps(50ULL) }, \
\
{ IFM_TOKEN | IFM_TOK_STP4, IF_Mbps(4) }, \
{ IFM_TOKEN | IFM_TOK_STP16, IF_Mbps(16) }, \
@@ -728,8 +827,6 @@
{ "no network", "active" } }, \
{ IFM_ATM, IFM_AVALID, IFM_ACTIVE, \
{ "no network", "active" } }, \
- { IFM_CARP, IFM_AVALID, IFM_ACTIVE, \
- { "backup", "master" } }, \
{ 0, 0, 0, \
{ NULL, NULL } } \
}
Modified: trunk/sys/net/if_mib.c
===================================================================
--- trunk/sys/net/if_mib.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_mib.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -27,7 +27,7 @@
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/if_mib.c 248085 2013-03-09 02:36:32Z marius $
+ * $FreeBSD: stable/10/sys/net/if_mib.c 227309 2011-11-07 15:43:11Z ed $
*/
#include <sys/param.h>
Modified: trunk/sys/net/if_mib.h
===================================================================
--- trunk/sys/net/if_mib.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_mib.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -27,7 +27,7 @@
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/if_mib.h 154023 2006-01-04 12:57:09Z harti $
+ * $FreeBSD: stable/10/sys/net/if_mib.h 154023 2006-01-04 12:57:09Z harti $
*/
#ifndef _NET_IF_MIB_H
Added: trunk/sys/net/if_pflog.h
===================================================================
--- trunk/sys/net/if_pflog.h (rev 0)
+++ trunk/sys/net/if_pflog.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,67 @@
+/* $MidnightBSD$ */
+/* $OpenBSD: if_pflog.h,v 1.13 2006/10/23 12:46:09 henning Exp $ */
+/*
+ * Copyright 2001 Niels Provos <provos at citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NET_IF_PFLOG_H_
+#define _NET_IF_PFLOG_H_
+
+#define PFLOGIFS_MAX 16
+
+#define PFLOG_RULESET_NAME_SIZE 16
+
+struct pfloghdr {
+ u_int8_t length;
+ sa_family_t af;
+ u_int8_t action;
+ u_int8_t reason;
+ char ifname[IFNAMSIZ];
+ char ruleset[PFLOG_RULESET_NAME_SIZE];
+ u_int32_t rulenr;
+ u_int32_t subrulenr;
+ uid_t uid;
+ pid_t pid;
+ uid_t rule_uid;
+ pid_t rule_pid;
+ u_int8_t dir;
+ u_int8_t pad[3];
+};
+
+#define PFLOG_HDRLEN sizeof(struct pfloghdr)
+/* minus pad, also used as a signature */
+#define PFLOG_REAL_HDRLEN offsetof(struct pfloghdr, pad)
+
+#ifdef _KERNEL
+struct pf_rule;
+struct pf_ruleset;
+struct pfi_kif;
+struct pf_pdesc;
+
+#define PFLOG_PACKET(i,a,b,c,d,e,f,g,h,di) do { \
+ if (pflog_packet_ptr != NULL) \
+ pflog_packet_ptr(i,a,b,c,d,e,f,g,h,di); \
+} while (0)
+#endif /* _KERNEL */
+#endif /* _NET_IF_PFLOG_H_ */
Property changes on: trunk/sys/net/if_pflog.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/net/if_pfsync.h
===================================================================
--- trunk/sys/net/if_pfsync.h (rev 0)
+++ trunk/sys/net/if_pfsync.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,272 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2001 Michael Shalayeff
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 2008 David Gwynne <dlg at openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $OpenBSD: if_pfsync.h,v 1.35 2008/06/29 08:42:15 mcbride Exp $
+ * $FreeBSD: stable/10/sys/net/if_pfsync.h 254925 2013-08-26 18:16:05Z jhb $
+ */
+
+
+#ifndef _NET_IF_PFSYNC_H_
+#define _NET_IF_PFSYNC_H_
+
+#define PFSYNC_VERSION 5
+#define PFSYNC_DFLTTL 255
+
+#define PFSYNC_ACT_CLR 0 /* clear all states */
+#define PFSYNC_ACT_INS 1 /* insert state */
+#define PFSYNC_ACT_INS_ACK 2 /* ack of insterted state */
+#define PFSYNC_ACT_UPD 3 /* update state */
+#define PFSYNC_ACT_UPD_C 4 /* "compressed" update state */
+#define PFSYNC_ACT_UPD_REQ 5 /* request "uncompressed" state */
+#define PFSYNC_ACT_DEL 6 /* delete state */
+#define PFSYNC_ACT_DEL_C 7 /* "compressed" delete state */
+#define PFSYNC_ACT_INS_F 8 /* insert fragment */
+#define PFSYNC_ACT_DEL_F 9 /* delete fragments */
+#define PFSYNC_ACT_BUS 10 /* bulk update status */
+#define PFSYNC_ACT_TDB 11 /* TDB replay counter update */
+#define PFSYNC_ACT_EOF 12 /* end of frame */
+#define PFSYNC_ACT_MAX 13
+
+/*
+ * A pfsync frame is built from a header followed by several sections which
+ * are all prefixed with their own subheaders. Frames must be terminated with
+ * an EOF subheader.
+ *
+ * | ... |
+ * | IP header |
+ * +============================+
+ * | pfsync_header |
+ * +----------------------------+
+ * | pfsync_subheader |
+ * +----------------------------+
+ * | first action fields |
+ * | ... |
+ * +----------------------------+
+ * | pfsync_subheader |
+ * +----------------------------+
+ * | second action fields |
+ * | ... |
+ * +----------------------------+
+ * | EOF pfsync_subheader |
+ * +----------------------------+
+ * | HMAC |
+ * +============================+
+ */
+
+/*
+ * Frame header
+ */
+
+struct pfsync_header {
+ u_int8_t version;
+ u_int8_t _pad;
+ u_int16_t len;
+ u_int8_t pfcksum[PF_MD5_DIGEST_LENGTH];
+} __packed;
+
+/*
+ * Frame region subheader
+ */
+
+struct pfsync_subheader {
+ u_int8_t action;
+ u_int8_t _pad;
+ u_int16_t count;
+} __packed;
+
+/*
+ * CLR
+ */
+
+struct pfsync_clr {
+ char ifname[IFNAMSIZ];
+ u_int32_t creatorid;
+} __packed;
+
+/*
+ * INS, UPD, DEL
+ */
+
+/* these use struct pfsync_state in pfvar.h */
+
+/*
+ * INS_ACK
+ */
+
+struct pfsync_ins_ack {
+ u_int64_t id;
+ u_int32_t creatorid;
+} __packed;
+
+/*
+ * UPD_C
+ */
+
+struct pfsync_upd_c {
+ u_int64_t id;
+ struct pfsync_state_peer src;
+ struct pfsync_state_peer dst;
+ u_int32_t creatorid;
+ u_int32_t expire;
+ u_int8_t timeout;
+ u_int8_t _pad[3];
+} __packed;
+
+/*
+ * UPD_REQ
+ */
+
+struct pfsync_upd_req {
+ u_int64_t id;
+ u_int32_t creatorid;
+} __packed;
+
+/*
+ * DEL_C
+ */
+
+struct pfsync_del_c {
+ u_int64_t id;
+ u_int32_t creatorid;
+} __packed;
+
+/*
+ * INS_F, DEL_F
+ */
+
+/* not implemented (yet) */
+
+/*
+ * BUS
+ */
+
+struct pfsync_bus {
+ u_int32_t creatorid;
+ u_int32_t endtime;
+ u_int8_t status;
+#define PFSYNC_BUS_START 1
+#define PFSYNC_BUS_END 2
+ u_int8_t _pad[3];
+} __packed;
+
+/*
+ * TDB
+ */
+
+struct pfsync_tdb {
+ u_int32_t spi;
+ union sockaddr_union dst;
+ u_int32_t rpl;
+ u_int64_t cur_bytes;
+ u_int8_t sproto;
+ u_int8_t updates;
+ u_int8_t _pad[2];
+} __packed;
+
+#define PFSYNC_HDRLEN sizeof(struct pfsync_header)
+
+/*
+ * Names for PFSYNC sysctl objects
+ */
+#define PFSYNCCTL_STATS 1 /* PFSYNC stats */
+#define PFSYNCCTL_MAXID 2
+
+struct pfsyncstats {
+ u_int64_t pfsyncs_ipackets; /* total input packets, IPv4 */
+ u_int64_t pfsyncs_ipackets6; /* total input packets, IPv6 */
+ u_int64_t pfsyncs_badif; /* not the right interface */
+ u_int64_t pfsyncs_badttl; /* TTL is not PFSYNC_DFLTTL */
+ u_int64_t pfsyncs_hdrops; /* packets shorter than hdr */
+ u_int64_t pfsyncs_badver; /* bad (incl unsupp) version */
+ u_int64_t pfsyncs_badact; /* bad action */
+ u_int64_t pfsyncs_badlen; /* data length does not match */
+ u_int64_t pfsyncs_badauth; /* bad authentication */
+ u_int64_t pfsyncs_stale; /* stale state */
+ u_int64_t pfsyncs_badval; /* bad values */
+ u_int64_t pfsyncs_badstate; /* insert/lookup failed */
+
+ u_int64_t pfsyncs_opackets; /* total output packets, IPv4 */
+ u_int64_t pfsyncs_opackets6; /* total output packets, IPv6 */
+ u_int64_t pfsyncs_onomem; /* no memory for an mbuf */
+ u_int64_t pfsyncs_oerrors; /* ip output error */
+
+ u_int64_t pfsyncs_iacts[PFSYNC_ACT_MAX];
+ u_int64_t pfsyncs_oacts[PFSYNC_ACT_MAX];
+};
+
+/*
+ * Configuration structure for SIOCSETPFSYNC SIOCGETPFSYNC
+ */
+struct pfsyncreq {
+ char pfsyncr_syncdev[IFNAMSIZ];
+ struct in_addr pfsyncr_syncpeer;
+ int pfsyncr_maxupdates;
+ int pfsyncr_defer;
+};
+
+#define SIOCSETPFSYNC _IOW('i', 247, struct ifreq)
+#define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq)
+
+#ifdef _KERNEL
+
+/*
+ * this shows where a pf state is with respect to the syncing.
+ */
+#define PFSYNC_S_INS 0x00
+#define PFSYNC_S_IACK 0x01
+#define PFSYNC_S_UPD 0x02
+#define PFSYNC_S_UPD_C 0x03
+#define PFSYNC_S_DEL 0x04
+#define PFSYNC_S_COUNT 0x05
+
+#define PFSYNC_S_DEFER 0xfe
+#define PFSYNC_S_NONE 0xff
+
+#define PFSYNC_SI_IOCTL 0x01
+#define PFSYNC_SI_CKSUM 0x02
+#define PFSYNC_SI_ACK 0x04
+
+#endif /* _KERNEL */
+
+#endif /* _NET_IF_PFSYNC_H_ */
Property changes on: trunk/sys/net/if_pfsync.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/if_sppp.h
===================================================================
--- trunk/sys/net/if_sppp.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_sppp.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -18,7 +18,7 @@
*
* From: Version 2.0, Fri Oct 6 20:39:21 MSK 1995
*
- * $FreeBSD: stable/9/sys/net/if_sppp.h 147256 2005-06-10 16:49:24Z brooks $
+ * $FreeBSD: stable/10/sys/net/if_sppp.h 147256 2005-06-10 16:49:24Z brooks $
*/
#ifndef _NET_IF_SPPP_H_
Modified: trunk/sys/net/if_spppfr.c
===================================================================
--- trunk/sys/net/if_spppfr.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_spppfr.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -21,12 +21,12 @@
* works or modified versions.
*
* $Cronyx Id: if_spppfr.c,v 1.1.2.10 2004/06/29 09:02:30 rik Exp $
- * $FreeBSD: stable/9/sys/net/if_spppfr.c 223741 2011-07-03 16:08:38Z bz $
+ * $FreeBSD: stable/10/sys/net/if_spppfr.c 243882 2012-12-05 08:04:20Z glebius $
*/
#include <sys/param.h>
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3
+#if defined(__FreeBSD__)
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipx.h"
@@ -46,7 +46,7 @@
#include <sys/sockio.h>
#include <sys/socket.h>
#include <sys/syslog.h>
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3
+#if defined(__FreeBSD__)
#include <sys/random.h>
#endif
#include <sys/malloc.h>
@@ -150,7 +150,7 @@
unsigned short ptarget2;
} __packed;
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3 && __FreeBSD_version < 501113
+#if defined(__FreeBSD__) && __FreeBSD_version < 501113
#define SPP_FMT "%s%d: "
#define SPP_ARGS(ifp) (ifp)->if_name, (ifp)->if_unit
#else
@@ -305,7 +305,7 @@
/* Prepend the space for Frame Relay header. */
hlen = (family == AF_INET) ? 4 : 10;
- M_PREPEND (m, hlen, M_DONTWAIT);
+ M_PREPEND (m, hlen, M_NOWAIT);
if (! m)
return 0;
h = mtod (m, u_char*);
@@ -382,7 +382,7 @@
unsigned char *h, *p;
struct mbuf *m;
- MGETHDR (m, M_DONTWAIT, MT_DATA);
+ MGETHDR (m, M_NOWAIT, MT_DATA);
if (! m)
return;
m->m_pkthdr.rcvif = 0;
@@ -502,7 +502,7 @@
(unsigned char) his_ip_address);
/* Send the Inverse ARP reply. */
- MGETHDR (m, M_DONTWAIT, MT_DATA);
+ MGETHDR (m, M_NOWAIT, MT_DATA);
if (! m)
return;
m->m_pkthdr.len = m->m_len = 10 + sizeof (*reply);
Modified: trunk/sys/net/if_spppsubr.c
===================================================================
--- trunk/sys/net/if_spppsubr.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_spppsubr.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -19,7 +19,7 @@
*
* From: Version 2.4, Thu Apr 30 17:17:21 MSD 1997
*
- * $FreeBSD: stable/9/sys/net/if_spppsubr.c 249132 2013-04-05 08:22:11Z mav $
+ * $FreeBSD: stable/10/sys/net/if_spppsubr.c 314667 2017-03-04 13:03:31Z avg $
*/
#include <sys/param.h>
@@ -263,7 +263,7 @@
int debug = ifp->if_flags & IFF_DEBUG
static int sppp_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro);
+ const struct sockaddr *dst, struct route *ro);
static void sppp_cisco_send(struct sppp *sp, int type, long par1, long par2);
static void sppp_cisco_input(struct sppp *sp, struct mbuf *m);
@@ -630,7 +630,7 @@
* enough leading space in the existing mbuf).
*/
m_adj(m, vjlen);
- M_PREPEND(m, hlen, M_DONTWAIT);
+ M_PREPEND(m, hlen, M_NOWAIT);
if (m == NULL) {
SPPP_UNLOCK(sp);
goto drop2;
@@ -786,19 +786,18 @@
* Enqueue transmit packet.
*/
static int
-sppp_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro)
+sppp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
{
struct sppp *sp = IFP2SP(ifp);
struct ppp_header *h;
struct ifqueue *ifq = NULL;
- int s, error, rv = 0;
+ int error, rv = 0;
#ifdef INET
int ipproto = PPP_IP;
#endif
int debug = ifp->if_flags & IFF_DEBUG;
- s = splimp();
SPPP_LOCK(sp);
if (!(ifp->if_flags & IFF_UP) ||
@@ -809,7 +808,6 @@
#endif
m_freem (m);
SPPP_UNLOCK(sp);
- splx (s);
return (ENETDOWN);
}
@@ -833,9 +831,7 @@
* to start LCP for it.
*/
ifp->if_drv_flags |= IFF_DRV_RUNNING;
- splx(s);
lcp.Open(sp);
- s = splimp();
}
#ifdef INET
@@ -859,7 +855,6 @@
{
m_freem(m);
SPPP_UNLOCK(sp);
- splx(s);
if(ip->ip_p == IPPROTO_TCP)
return(EADDRNOTAVAIL);
else
@@ -904,7 +899,6 @@
default:
m_freem(m);
SPPP_UNLOCK(sp);
- splx(s);
return (EINVAL);
}
}
@@ -927,7 +921,7 @@
/*
* Prepend general data packet PPP header. For now, IP only.
*/
- M_PREPEND (m, PPP_HEADER_LEN, M_DONTWAIT);
+ M_PREPEND (m, PPP_HEADER_LEN, M_NOWAIT);
if (! m) {
nobufs: if (debug)
log(LOG_DEBUG, SPP_FMT "no memory for transmit header\n",
@@ -934,7 +928,6 @@
SPP_ARGS(ifp));
++ifp->if_oerrors;
SPPP_UNLOCK(sp);
- splx (s);
return (ENOBUFS);
}
/*
@@ -1001,7 +994,6 @@
m_freem (m);
++ifp->if_oerrors;
SPPP_UNLOCK(sp);
- splx (s);
return (EAFNOSUPPORT);
}
@@ -1017,11 +1009,9 @@
if (error) {
++ifp->if_oerrors;
SPPP_UNLOCK(sp);
- splx (s);
return (rv? rv: ENOBUFS);
}
SPPP_UNLOCK(sp);
- splx (s);
/*
* Unlike in sppp_input(), we can always bump the timestamp
* here since sppp_output() is only called on behalf of
@@ -1041,7 +1031,7 @@
mtx_init(&sp->mtx, "sppp", MTX_NETWORK_LOCK, MTX_DEF | MTX_RECURSE);
/* Initialize keepalive handler. */
- callout_init(&sp->keepalive_callout, CALLOUT_MPSAFE);
+ callout_init(&sp->keepalive_callout, 1);
callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive,
(void *)sp);
@@ -1073,7 +1063,7 @@
#ifdef INET6
sp->confflags |= CONF_ENABLE_IPV6;
#endif
- callout_init(&sp->ifstart_callout, CALLOUT_MPSAFE);
+ callout_init(&sp->ifstart_callout, 1);
sp->if_start = ifp->if_start;
ifp->if_start = sppp_ifstart;
sp->pp_comp = malloc(sizeof(struct slcompress), M_TEMP, M_WAITOK);
@@ -1138,14 +1128,12 @@
sppp_isempty(struct ifnet *ifp)
{
struct sppp *sp = IFP2SP(ifp);
- int empty, s;
+ int empty;
- s = splimp();
SPPP_LOCK(sp);
empty = !sp->pp_fastq.ifq_head && !sp->pp_cpq.ifq_head &&
!SP2IFP(sp)->if_snd.ifq_head;
SPPP_UNLOCK(sp);
- splx(s);
return (empty);
}
@@ -1157,9 +1145,7 @@
{
struct sppp *sp = IFP2SP(ifp);
struct mbuf *m;
- int s;
- s = splimp();
SPPP_LOCK(sp);
/*
* Process only the control protocol queue until we have at
@@ -1176,7 +1162,6 @@
IF_DEQUEUE (&SP2IFP(sp)->if_snd, m);
}
SPPP_UNLOCK(sp);
- splx(s);
return m;
}
@@ -1188,9 +1173,7 @@
{
struct sppp *sp = IFP2SP(ifp);
struct mbuf *m;
- int s;
- s = splimp ();
SPPP_LOCK(sp);
m = sp->pp_cpq.ifq_head;
@@ -1201,7 +1184,6 @@
if ((m = sp->pp_fastq.ifq_head) == NULL)
m = SP2IFP(sp)->if_snd.ifq_head;
SPPP_UNLOCK(sp);
- splx (s);
return (m);
}
@@ -1213,14 +1195,12 @@
{
struct ifreq *ifr = (struct ifreq*) data;
struct sppp *sp = IFP2SP(ifp);
- int s, rv, going_up, going_down, newmode;
+ int rv, going_up, going_down, newmode;
- s = splimp();
SPPP_LOCK(sp);
rv = 0;
switch (cmd) {
case SIOCAIFADDR:
- case SIOCSIFDSTADDR:
break;
case SIOCSIFADDR:
@@ -1321,7 +1301,6 @@
rv = ENOTTY;
}
SPPP_UNLOCK(sp);
- splx(s);
return rv;
}
@@ -1413,7 +1392,7 @@
getmicrouptime(&tv);
- MGETHDR (m, M_DONTWAIT, MT_DATA);
+ MGETHDR (m, M_NOWAIT, MT_DATA);
if (! m)
return;
m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + CISCO_PACKET_LEN;
@@ -1461,7 +1440,7 @@
if (len > MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN)
len = MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN;
- MGETHDR (m, M_DONTWAIT, MT_DATA);
+ MGETHDR (m, M_NOWAIT, MT_DATA);
if (! m)
return;
m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + LCP_HEADER_LEN + len;
@@ -2071,9 +2050,7 @@
sppp_to_event(const struct cp *cp, struct sppp *sp)
{
STDDCL;
- int s;
- s = splimp();
SPPP_LOCK(sp);
if (debug)
log(LOG_DEBUG, SPP_FMT "%s TO(%s) rst_counter = %d\n",
@@ -2123,7 +2100,6 @@
}
SPPP_UNLOCK(sp);
- splx(s);
}
/*
@@ -2195,7 +2171,7 @@
sp->lcp.max_terminate = 2;
sp->lcp.max_configure = 10;
sp->lcp.max_failure = 10;
- callout_init(&sp->ch[IDX_LCP], CALLOUT_MPSAFE);
+ callout_init(&sp->ch[IDX_LCP], 1);
}
static void
@@ -2886,7 +2862,7 @@
sp->fail_counter[IDX_IPCP] = 0;
sp->pp_seq[IDX_IPCP] = 0;
sp->pp_rseq[IDX_IPCP] = 0;
- callout_init(&sp->ch[IDX_IPCP], CALLOUT_MPSAFE);
+ callout_init(&sp->ch[IDX_IPCP], 1);
}
static void
@@ -3445,7 +3421,7 @@
sp->fail_counter[IDX_IPV6CP] = 0;
sp->pp_seq[IDX_IPV6CP] = 0;
sp->pp_rseq[IDX_IPV6CP] = 0;
- callout_init(&sp->ch[IDX_IPV6CP], CALLOUT_MPSAFE);
+ callout_init(&sp->ch[IDX_IPV6CP], 1);
}
static void
@@ -3620,7 +3596,7 @@
continue;
}
- bzero(&suggestaddr, sizeof(&suggestaddr));
+ bzero(&suggestaddr, sizeof(suggestaddr));
if (collision && nohisaddr) {
/* collision, hisaddr unknown - Conf-Rej */
type = CONF_REJ;
@@ -4026,7 +4002,7 @@
{
STDDCL;
struct lcp_header *h;
- int len, x;
+ int len;
u_char *value, *name, digest[AUTHKEYLEN], dsize;
int value_len, name_len;
MD5_CTX ctx;
@@ -4103,7 +4079,6 @@
}
log(-1, "\n");
}
- x = splimp();
SPPP_LOCK(sp);
sp->pp_flags &= ~PP_NEEDAUTH;
if (sp->myauth.proto == PPP_CHAP &&
@@ -4115,11 +4090,9 @@
* to network phase.
*/
SPPP_UNLOCK(sp);
- splx(x);
break;
}
SPPP_UNLOCK(sp);
- splx(x);
sppp_phase_network(sp);
break;
@@ -4253,7 +4226,7 @@
sp->fail_counter[IDX_CHAP] = 0;
sp->pp_seq[IDX_CHAP] = 0;
sp->pp_rseq[IDX_CHAP] = 0;
- callout_init(&sp->ch[IDX_CHAP], CALLOUT_MPSAFE);
+ callout_init(&sp->ch[IDX_CHAP], 1);
}
static void
@@ -4281,9 +4254,7 @@
{
struct sppp *sp = (struct sppp *)cookie;
STDDCL;
- int s;
- s = splimp();
SPPP_LOCK(sp);
if (debug)
log(LOG_DEBUG, SPP_FMT "chap TO(%s) rst_counter = %d\n",
@@ -4314,7 +4285,6 @@
}
SPPP_UNLOCK(sp);
- splx(s);
}
static void
@@ -4321,7 +4291,7 @@
sppp_chap_tlu(struct sppp *sp)
{
STDDCL;
- int i, x;
+ int i;
i = 0;
sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure;
@@ -4352,7 +4322,6 @@
log(-1, "re-challenging supressed\n");
}
- x = splimp();
SPPP_LOCK(sp);
/* indicate to LCP that we need to be closed down */
sp->lcp.protos |= (1 << IDX_CHAP);
@@ -4364,11 +4333,9 @@
* phase.
*/
SPPP_UNLOCK(sp);
- splx(x);
return;
}
SPPP_UNLOCK(sp);
- splx(x);
/*
* If we are already in phase network, we are done here. This
@@ -4437,7 +4404,7 @@
{
STDDCL;
struct lcp_header *h;
- int len, x;
+ int len;
u_char *name, *passwd, mlen;
int name_len, passwd_len;
@@ -4524,7 +4491,6 @@
}
log(-1, "\n");
}
- x = splimp();
SPPP_LOCK(sp);
sp->pp_flags &= ~PP_NEEDAUTH;
if (sp->myauth.proto == PPP_PAP &&
@@ -4536,11 +4502,9 @@
* to network phase.
*/
SPPP_UNLOCK(sp);
- splx(x);
break;
}
SPPP_UNLOCK(sp);
- splx(x);
sppp_phase_network(sp);
break;
@@ -4584,8 +4548,8 @@
sp->fail_counter[IDX_PAP] = 0;
sp->pp_seq[IDX_PAP] = 0;
sp->pp_rseq[IDX_PAP] = 0;
- callout_init(&sp->ch[IDX_PAP], CALLOUT_MPSAFE);
- callout_init(&sp->pap_my_to_ch, CALLOUT_MPSAFE);
+ callout_init(&sp->ch[IDX_PAP], 1);
+ callout_init(&sp->pap_my_to_ch, 1);
}
static void
@@ -4621,9 +4585,7 @@
{
struct sppp *sp = (struct sppp *)cookie;
STDDCL;
- int s;
- s = splimp();
SPPP_LOCK(sp);
if (debug)
log(LOG_DEBUG, SPP_FMT "pap TO(%s) rst_counter = %d\n",
@@ -4649,7 +4611,6 @@
}
SPPP_UNLOCK(sp);
- splx(s);
}
/*
@@ -4676,7 +4637,6 @@
sppp_pap_tlu(struct sppp *sp)
{
STDDCL;
- int x;
sp->rst_counter[IDX_PAP] = sp->lcp.max_configure;
@@ -4684,7 +4644,6 @@
log(LOG_DEBUG, SPP_FMT "%s tlu\n",
SPP_ARGS(ifp), pap.name);
- x = splimp();
SPPP_LOCK(sp);
/* indicate to LCP that we need to be closed down */
sp->lcp.protos |= (1 << IDX_PAP);
@@ -4696,11 +4655,9 @@
* phase.
*/
SPPP_UNLOCK(sp);
- splx(x);
return;
}
SPPP_UNLOCK(sp);
- splx(x);
sppp_phase_network(sp);
}
@@ -4765,7 +4722,7 @@
const char *msg;
va_list ap;
- MGETHDR (m, M_DONTWAIT, MT_DATA);
+ MGETHDR (m, M_NOWAIT, MT_DATA);
if (! m)
return;
m->m_pkthdr.rcvif = 0;
@@ -4822,7 +4779,7 @@
n = ifq->ifq_head;
while ((m = n)) {
- n = m->m_act;
+ n = m->m_nextpkt;
m_freem (m);
}
ifq->ifq_head = 0;
@@ -4838,9 +4795,7 @@
{
struct sppp *sp = (struct sppp*)dummy;
struct ifnet *ifp = SP2IFP(sp);
- int s;
- s = splimp();
SPPP_LOCK(sp);
/* Keepalive mode disabled or channel down? */
if (! (sp->pp_flags & PP_KEEPALIVE) ||
@@ -4883,7 +4838,6 @@
}
out:
SPPP_UNLOCK(sp);
- splx(s);
callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive,
(void *)sp);
}
@@ -4933,7 +4887,7 @@
#ifdef INET
/*
- * Set my IP address. Must be called at splimp.
+ * Set my IP address.
*/
static void
sppp_set_ip_addr(struct sppp *sp, u_long src)
@@ -5050,7 +5004,7 @@
}
/*
- * Set my IPv6 address. Must be called at splimp.
+ * Set my IPv6 address.
*/
static void
sppp_set_ip6_addr(struct sppp *sp, const struct in6_addr *src)
@@ -5132,7 +5086,8 @@
* Check the cmd word first before attempting to fetch all the
* data.
*/
- if ((subcmd = fuword(ifr->ifr_data)) == -1) {
+ rv = fueword(ifr->ifr_data, &subcmd);
+ if (rv == -1) {
rv = EFAULT;
goto quit;
}
Modified: trunk/sys/net/if_stf.c
===================================================================
--- trunk/sys/net/if_stf.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_stf.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,5 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/net/if_stf.c 248743 2013-03-26 18:57:25Z melifaro $ */
+/* $FreeBSD: stable/10/sys/net/if_stf.c 275828 2014-12-16 11:53:45Z ae $ */
/* $KAME: if_stf.c,v 1.73 2001/12/03 11:08:30 keiichi Exp $ */
/*-
@@ -133,7 +133,6 @@
SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RW | CTLFLAG_TUN,
&stf_permit_rfc1918, 0, "Permit the use of private IPv4 addresses");
-#define STFNAME "stf"
#define STFUNIT 0
#define IN6_IS_ADDR_6TO4(x) (ntohs((x)->s6_addr16[0]) == 0x2002)
@@ -142,7 +141,7 @@
* XXX: Return a pointer with 16-bit aligned. Don't cast it to
* struct in_addr *; use bcopy() instead.
*/
-#define GET_V4(x) ((caddr_t)(&(x)->s6_addr16[1]))
+#define GET_V4(x) (&(x)->s6_addr16[1])
struct stf_softc {
struct ifnet *sc_ifp;
@@ -157,11 +156,13 @@
};
#define STF2IFP(sc) ((sc)->sc_ifp)
+static const char stfname[] = "stf";
+
/*
* Note that mutable fields in the softc are not currently locked.
* We do lock sc_ro in stf_output though.
*/
-static MALLOC_DEFINE(M_STF, STFNAME, "6to4 Tunnel Interface");
+static MALLOC_DEFINE(M_STF, stfname, "6to4 Tunnel Interface");
static const int ip_stf_ttl = 40;
extern struct domain inetdomain;
@@ -181,7 +182,7 @@
static int stfmodevent(module_t, int, void *);
static int stf_encapcheck(const struct mbuf *, int, int, void *);
static struct in6_ifaddr *stf_getsrcifa6(struct ifnet *);
-static int stf_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+static int stf_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
static int isrfc1918addr(struct in_addr *);
static int stf_checkaddr4(struct stf_softc *, struct in_addr *,
@@ -194,8 +195,7 @@
static int stf_clone_match(struct if_clone *, const char *);
static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t);
static int stf_clone_destroy(struct if_clone *, struct ifnet *);
-struct if_clone stf_cloner = IFC_CLONE_INITIALIZER(STFNAME, NULL, 0,
- NULL, stf_clone_match, stf_clone_create, stf_clone_destroy);
+static struct if_clone *stf_cloner;
static int
stf_clone_match(struct if_clone *ifc, const char *name)
@@ -242,7 +242,7 @@
* we don't conform to the default naming convention for interfaces.
*/
strlcpy(ifp->if_xname, name, IFNAMSIZ);
- ifp->if_dname = ifc->ifc_name;
+ ifp->if_dname = stfname;
ifp->if_dunit = IF_DUNIT_NONE;
mtx_init(&(sc)->sc_ro_mtx, "stf ro", NULL, MTX_DEF);
@@ -292,10 +292,11 @@
switch (type) {
case MOD_LOAD:
- if_clone_attach(&stf_cloner);
+ stf_cloner = if_clone_advanced(stfname, 0, stf_clone_match,
+ stf_clone_create, stf_clone_destroy);
break;
case MOD_UNLOAD:
- if_clone_detach(&stf_cloner);
+ if_clone_detach(stf_cloner);
break;
default:
return (EOPNOTSUPP);
@@ -413,23 +414,19 @@
}
static int
-stf_output(ifp, m, dst, ro)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr *dst;
- struct route *ro;
+stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
{
struct stf_softc *sc;
- struct sockaddr_in6 *dst6;
+ const struct sockaddr_in6 *dst6;
struct route *cached_route;
struct in_addr in4;
- caddr_t ptr;
+ const void *ptr;
struct sockaddr_in *dst4;
u_int8_t tos;
struct ip *ip;
struct ip6_hdr *ip6;
struct in6_ifaddr *ia6;
- u_int32_t af;
int error;
#ifdef MAC
@@ -441,7 +438,7 @@
#endif
sc = ifp->if_softc;
- dst6 = (struct sockaddr_in6 *)dst;
+ dst6 = (const struct sockaddr_in6 *)dst;
/* just in case */
if ((ifp->if_flags & IFF_UP) == 0) {
@@ -474,15 +471,6 @@
tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
/*
- * BPF writes need to be handled specially.
- * This is a null operation, nothing here checks dst->sa_family.
- */
- if (dst->sa_family == AF_UNSPEC) {
- bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
-
- /*
* Pickup the right outer dst addr from the list of candidates.
* ip6_dst has priority as it may be able to give us shorter IPv4 hops.
*/
@@ -507,13 +495,11 @@
* will only read from the mbuf (i.e., it won't
* try to free it or keep a pointer a to it).
*/
- af = AF_INET6;
+ u_int af = AF_INET6;
bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
}
- M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
- if (m && m->m_len < sizeof(struct ip))
- m = m_pullup(m, sizeof(struct ip));
+ M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
if (m == NULL) {
ifa_free(&ia6->ia_ifa);
ifp->if_oerrors++;
@@ -529,7 +515,7 @@
bcopy(&in4, &ip->ip_dst, sizeof(ip->ip_dst));
ip->ip_p = IPPROTO_IPV6;
ip->ip_ttl = ip_stf_ttl;
- ip->ip_len = m->m_pkthdr.len; /*host order*/
+ ip->ip_len = htons(m->m_pkthdr.len);
if (ifp->if_flags & IFF_LINK1)
ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
else
@@ -625,10 +611,7 @@
* reject packets with broadcast
*/
IN_IFADDR_RLOCK();
- for (ia4 = TAILQ_FIRST(&V_in_ifaddrhead);
- ia4;
- ia4 = TAILQ_NEXT(ia4, ia_link))
- {
+ TAILQ_FOREACH(ia4, &V_in_ifaddrhead, ia_link) {
if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
continue;
if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
@@ -800,7 +783,7 @@
struct rt_addrinfo *info;
{
RT_LOCK_ASSERT(rt);
- rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+ rt->rt_mtu = rt->rt_ifp->if_mtu;
}
static int
Modified: trunk/sys/net/if_stf.h
===================================================================
--- trunk/sys/net/if_stf.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_stf.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,5 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/net/if_stf.h 139823 2005-01-07 01:45:51Z imp $ */
+/* $FreeBSD: stable/10/sys/net/if_stf.h 139823 2005-01-07 01:45:51Z imp $ */
/* $KAME: if_stf.h,v 1.5 2001/10/12 10:09:17 keiichi Exp $ */
/*-
Modified: trunk/sys/net/if_tap.c
===================================================================
--- trunk/sys/net/if_tap.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_tap.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -32,7 +32,7 @@
*/
/*
- * $FreeBSD: stable/9/sys/net/if_tap.c 248085 2013-03-09 02:36:32Z marius $
+ * $FreeBSD: stable/10/sys/net/if_tap.c 326692 2017-12-08 15:26:57Z hselasky $
* $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
*/
@@ -80,8 +80,8 @@
#define CDEV_NAME "tap"
#define TAPDEBUG if (tapdebug) printf
-#define TAP "tap"
-#define VMNET "vmnet"
+static const char tapname[] = "tap";
+static const char vmnetname[] = "vmnet";
#define TAPMAXUNIT 0x7fff
#define VMNET_DEV_MASK CLONE_FLAG0
@@ -100,12 +100,11 @@
static int tap_clone_create(struct if_clone *, int, caddr_t);
static void tap_clone_destroy(struct ifnet *);
+static struct if_clone *tap_cloner;
static int vmnet_clone_create(struct if_clone *, int, caddr_t);
static void vmnet_clone_destroy(struct ifnet *);
+static struct if_clone *vmnet_cloner;
-IFC_SIMPLE_DECLARE(tap, 0);
-IFC_SIMPLE_DECLARE(vmnet, 0);
-
/* character device */
static d_open_t tapopen;
static d_close_t tapclose;
@@ -136,7 +135,7 @@
static struct cdevsw tap_cdevsw = {
.d_version = D_VERSION,
- .d_flags = D_PSEUDO | D_NEEDMINOR,
+ .d_flags = D_NEEDMINOR,
.d_open = tapopen,
.d_close = tapclose,
.d_read = tapread,
@@ -184,18 +183,12 @@
{
struct cdev *dev;
int i;
- int extra;
- if (strcmp(ifc->ifc_name, VMNET) == 0)
- extra = VMNET_DEV_MASK;
- else
- extra = 0;
-
- /* find any existing device, or allocate new unit number */
- i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, extra);
+ /* Find any existing device, or allocate new unit number. */
+ i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, 0);
if (i) {
- dev = make_dev(&tap_cdevsw, unit | extra,
- UID_ROOT, GID_WHEEL, 0600, "%s%d", ifc->ifc_name, unit);
+ dev = make_dev(&tap_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600,
+ "%s%d", tapname, unit);
}
tapcreate(dev);
@@ -206,7 +199,18 @@
static int
vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
- return tap_clone_create(ifc, unit, params);
+ struct cdev *dev;
+ int i;
+
+ /* Find any existing device, or allocate new unit number. */
+ i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, VMNET_DEV_MASK);
+ if (i) {
+ dev = make_dev(&tap_cdevsw, unit | VMNET_DEV_MASK, UID_ROOT,
+ GID_WHEEL, 0600, "%s%d", vmnetname, unit);
+ }
+
+ tapcreate(dev);
+ return (0);
}
static void
@@ -214,16 +218,13 @@
{
struct ifnet *ifp = tp->tap_ifp;
- /* Unlocked read. */
- KASSERT(!(tp->tap_flags & TAP_OPEN),
- ("%s flags is out of sync", ifp->if_xname));
-
CURVNET_SET(ifp->if_vnet);
+ destroy_dev(tp->tap_dev);
seldrain(&tp->tap_rsel);
+ knlist_clear(&tp->tap_rsel.si_note, 0);
knlist_destroy(&tp->tap_rsel.si_note);
- destroy_dev(tp->tap_dev);
ether_ifdetach(ifp);
- if_free_type(ifp, IFT_ETHER);
+ if_free(ifp);
mtx_destroy(&tp->tap_mtx);
free(tp, M_TAP);
@@ -275,8 +276,10 @@
mtx_destroy(&tapmtx);
return (ENOMEM);
}
- if_clone_attach(&tap_cloner);
- if_clone_attach(&vmnet_cloner);
+ tap_cloner = if_clone_simple(tapname, tap_clone_create,
+ tap_clone_destroy, 0);
+ vmnet_cloner = if_clone_simple(vmnetname, vmnet_clone_create,
+ vmnet_clone_destroy, 0);
return (0);
case MOD_UNLOAD:
@@ -298,8 +301,8 @@
mtx_unlock(&tapmtx);
EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
- if_clone_detach(&tap_cloner);
- if_clone_detach(&vmnet_cloner);
+ if_clone_detach(tap_cloner);
+ if_clone_detach(vmnet_cloner);
drain_dev_clone_events();
mtx_lock(&tapmtx);
@@ -353,13 +356,13 @@
extra = 0;
/* We're interested in only tap/vmnet devices. */
- if (strcmp(name, TAP) == 0) {
+ if (strcmp(name, tapname) == 0) {
unit = -1;
- } else if (strcmp(name, VMNET) == 0) {
+ } else if (strcmp(name, vmnetname) == 0) {
unit = -1;
extra = VMNET_DEV_MASK;
- } else if (dev_stdclone(name, NULL, TAP, &unit) != 1) {
- if (dev_stdclone(name, NULL, VMNET, &unit) != 1) {
+ } else if (dev_stdclone(name, NULL, tapname, &unit) != 1) {
+ if (dev_stdclone(name, NULL, vmnetname, &unit) != 1) {
return;
} else {
extra = VMNET_DEV_MASK;
@@ -405,11 +408,9 @@
unsigned short macaddr_hi;
uint32_t macaddr_mid;
int unit;
- char *name = NULL;
+ const char *name = NULL;
u_char eaddr[6];
- dev->si_flags &= ~SI_CHEAPCLONE;
-
/* allocate driver storage and create device */
tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
@@ -421,10 +422,10 @@
/* select device: tap or vmnet */
if (unit & VMNET_DEV_MASK) {
- name = VMNET;
+ name = vmnetname;
tp->tap_flags |= TAP_VMNET;
} else
- name = TAP;
+ name = tapname;
unit &= TAPMAXUNIT;
@@ -736,9 +737,10 @@
switch (cmd) {
case TAPSIFINFO:
tapp = (struct tapinfo *)data;
+ if (ifp->if_type != tapp->type)
+ return (EPROTOTYPE);
mtx_lock(&tp->tap_mtx);
ifp->if_mtu = tapp->mtu;
- ifp->if_type = tapp->type;
ifp->if_baudrate = tapp->baudrate;
mtx_unlock(&tp->tap_mtx);
break;
@@ -947,7 +949,7 @@
return (EIO);
}
- if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, ETHER_ALIGN,
+ if ((m = m_uiotombuf(uio, M_NOWAIT, 0, ETHER_ALIGN,
M_PKTHDR)) == NULL) {
ifp->if_ierrors ++;
return (ENOBUFS);
Modified: trunk/sys/net/if_tap.h
===================================================================
--- trunk/sys/net/if_tap.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_tap.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -32,7 +32,7 @@
*/
/*
- * $FreeBSD: stable/9/sys/net/if_tap.h 182880 2008-09-08 22:43:55Z emax $
+ * $FreeBSD: stable/10/sys/net/if_tap.h 182880 2008-09-08 22:43:55Z emax $
* $Id: if_tap.h,v 0.7 2000/07/12 04:12:51 max Exp $
*/
Modified: trunk/sys/net/if_tapvar.h
===================================================================
--- trunk/sys/net/if_tapvar.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_tapvar.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -35,7 +35,7 @@
*/
/*
- * $FreeBSD: stable/9/sys/net/if_tapvar.h 147256 2005-06-10 16:49:24Z brooks $
+ * $FreeBSD: stable/10/sys/net/if_tapvar.h 240942 2012-09-25 23:41:45Z emaste $
* $Id: if_tapvar.h,v 0.6 2000/07/11 02:16:08 max Exp $
*/
Modified: trunk/sys/net/if_tun.c
===================================================================
--- trunk/sys/net/if_tun.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_tun.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -14,7 +14,7 @@
* UCL. This driver is based much more on read/write/poll mode of
* operation though.
*
- * $FreeBSD: stable/9/sys/net/if_tun.c 248085 2013-03-09 02:36:32Z marius $
+ * $FreeBSD: stable/10/sys/net/if_tun.c 326692 2017-12-08 15:26:57Z hselasky $
*/
#include "opt_atalk.h"
@@ -100,7 +100,6 @@
#define TUN2IFP(sc) ((sc)->tun_ifp)
#define TUNDEBUG if (tundebug) if_printf
-#define TUNNAME "tun"
/*
* All mutable global variables in if_tun are locked using tunmtx, with
@@ -108,7 +107,8 @@
* which is static after setup.
*/
static struct mtx tunmtx;
-static MALLOC_DEFINE(M_TUN, TUNNAME, "Tunnel Interface");
+static const char tunname[] = "tun";
+static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
static int tundebug = 0;
static int tundclone = 1;
static struct clonedevs *tunclones;
@@ -129,15 +129,14 @@
static int tunifioctl(struct ifnet *, u_long, caddr_t);
static void tuninit(struct ifnet *);
static int tunmodevent(module_t, int, void *);
-static int tunoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *ro);
+static int tunoutput(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *ro);
static void tunstart(struct ifnet *);
static int tun_clone_create(struct if_clone *, int, caddr_t);
static void tun_clone_destroy(struct ifnet *);
+static struct if_clone *tun_cloner;
-IFC_SIMPLE_DECLARE(tun, 0);
-
static d_open_t tunopen;
static d_close_t tunclose;
static d_read_t tunread;
@@ -166,7 +165,7 @@
static struct cdevsw tun_cdevsw = {
.d_version = D_VERSION,
- .d_flags = D_PSEUDO | D_NEEDMINOR,
+ .d_flags = D_NEEDMINOR,
.d_open = tunopen,
.d_close = tunclose,
.d_read = tunread,
@@ -174,7 +173,7 @@
.d_ioctl = tunioctl,
.d_poll = tunpoll,
.d_kqfilter = tunkqfilter,
- .d_name = TUNNAME,
+ .d_name = tunname,
};
static int
@@ -188,9 +187,9 @@
if (i) {
/* No preexisting struct cdev *, create one */
dev = make_dev(&tun_cdevsw, unit,
- UID_UUCP, GID_DIALER, 0600, "%s%d", ifc->ifc_name, unit);
+ UID_UUCP, GID_DIALER, 0600, "%s%d", tunname, unit);
}
- tuncreate(ifc->ifc_name, dev);
+ tuncreate(tunname, dev);
return (0);
}
@@ -212,9 +211,9 @@
if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)
return;
- if (strcmp(name, TUNNAME) == 0) {
+ if (strcmp(name, tunname) == 0) {
u = -1;
- } else if (dev_stdclone(name, NULL, TUNNAME, &u) != 1)
+ } else if (dev_stdclone(name, NULL, tunname, &u) != 1)
return; /* Don't recognise the name */
if (u != -1 && u > IF_MAXUNIT)
return; /* Unit number too high */
@@ -247,7 +246,6 @@
{
struct cdev *dev;
- /* Unlocked read. */
mtx_lock(&tp->tun_mtx);
if ((tp->tun_flags & TUN_OPEN) != 0)
cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
@@ -261,6 +259,7 @@
if_free(TUN2IFP(tp));
destroy_dev(dev);
seldrain(&tp->tun_rsel);
+ knlist_clear(&tp->tun_rsel.si_note, 0);
knlist_destroy(&tp->tun_rsel.si_note);
mtx_destroy(&tp->tun_mtx);
cv_destroy(&tp->tun_cv);
@@ -292,10 +291,11 @@
tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
if (tag == NULL)
return (ENOMEM);
- if_clone_attach(&tun_cloner);
+ tun_cloner = if_clone_simple(tunname, tun_clone_create,
+ tun_clone_destroy, 0);
break;
case MOD_UNLOAD:
- if_clone_detach(&tun_cloner);
+ if_clone_detach(tun_cloner);
EVENTHANDLER_DEREGISTER(dev_clone, tag);
drain_dev_clone_events();
@@ -323,6 +323,7 @@
};
DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_tun, 1);
static void
tunstart(struct ifnet *ifp)
@@ -362,8 +363,6 @@
struct tun_softc *sc;
struct ifnet *ifp;
- dev->si_flags &= ~SI_CHEAPCLONE;
-
sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
cv_init(&sc->tun_cv, "tun_condvar");
@@ -410,7 +409,7 @@
*/
tp = dev->si_drv1;
if (!tp) {
- tuncreate(TUNNAME, dev);
+ tuncreate(tunname, dev);
tp = dev->si_drv1;
}
@@ -555,10 +554,6 @@
tuninit(ifp);
TUNDEBUG(ifp, "address set\n");
break;
- case SIOCSIFDSTADDR:
- tuninit(ifp);
- TUNDEBUG(ifp, "destination address set\n");
- break;
case SIOCSIFMTU:
ifp->if_mtu = ifr->ifr_mtu;
TUNDEBUG(ifp, "mtu set\n");
@@ -577,7 +572,7 @@
* tunoutput - queue packets from higher level ready to put out.
*/
static int
-tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
+tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
struct route *ro)
{
struct tun_softc *tp = ifp->if_softc;
@@ -611,20 +606,18 @@
}
/* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
+ if (dst->sa_family == AF_UNSPEC)
bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
+ else
+ af = dst->sa_family;
- if (bpf_peers_present(ifp->if_bpf)) {
- af = dst->sa_family;
+ if (bpf_peers_present(ifp->if_bpf))
bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
- }
/* prepend sockaddr? this may abort if the mbuf allocation fails */
if (cached_tun_flags & TUN_LMODE) {
/* allocate space for sockaddr */
- M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
+ M_PREPEND(m0, dst->sa_len, M_NOWAIT);
/* if allocation failed drop packet */
if (m0 == NULL) {
@@ -638,7 +631,7 @@
if (cached_tun_flags & TUN_IFHEAD) {
/* Prepend the address family */
- M_PREPEND(m0, 4, M_DONTWAIT);
+ M_PREPEND(m0, 4, M_NOWAIT);
/* if allocation failed drop packet */
if (m0 == NULL) {
@@ -646,10 +639,10 @@
ifp->if_oerrors++;
return (ENOBUFS);
} else
- *(u_int32_t *)m0->m_data = htonl(dst->sa_family);
+ *(u_int32_t *)m0->m_data = htonl(af);
} else {
#ifdef INET
- if (dst->sa_family != AF_INET)
+ if (af != AF_INET)
#endif
{
m_freem(m0);
@@ -685,9 +678,10 @@
if (error)
return (error);
}
+ if (TUN2IFP(tp)->if_type != tunp->type)
+ return (EPROTOTYPE);
mtx_lock(&tp->tun_mtx);
TUN2IFP(tp)->if_mtu = tunp->mtu;
- TUN2IFP(tp)->if_type = tunp->type;
TUN2IFP(tp)->if_baudrate = tunp->baudrate;
mtx_unlock(&tp->tun_mtx);
break;
@@ -874,7 +868,7 @@
return (EIO);
}
- if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0, M_PKTHDR)) == NULL) {
+ if ((m = m_uiotombuf(uio, M_NOWAIT, 0, 0, M_PKTHDR)) == NULL) {
ifp->if_ierrors++;
return (ENOBUFS);
}
@@ -925,9 +919,8 @@
m_freem(m);
return (EAFNOSUPPORT);
}
- /* First chunk of an mbuf contains good junk */
if (harvest.point_to_point)
- random_harvest(m, 16, 3, 0, RANDOM_NET);
+ random_harvest(&(m->m_data), 12, 2, RANDOM_NET_TUN);
ifp->if_ibytes += m->m_pkthdr.len;
ifp->if_ipackets++;
CURVNET_SET(ifp->if_vnet);
Modified: trunk/sys/net/if_tun.h
===================================================================
--- trunk/sys/net/if_tun.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_tun.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -14,7 +14,7 @@
* UCL. This driver is based much more on read/write/select mode of
* operation though.
*
- * $FreeBSD: stable/9/sys/net/if_tun.h 139823 2005-01-07 01:45:51Z imp $
+ * $FreeBSD: stable/10/sys/net/if_tun.h 139823 2005-01-07 01:45:51Z imp $
*/
#ifndef _NET_IF_TUN_H_
Modified: trunk/sys/net/if_types.h
===================================================================
--- trunk/sys/net/if_types.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_types.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)if_types.h 8.3 (Berkeley) 4/28/95
- * $FreeBSD: stable/9/sys/net/if_types.h 219819 2011-03-21 09:40:01Z jeff $
+ * $FreeBSD: stable/10/sys/net/if_types.h 228571 2011-12-16 12:16:56Z glebius $
* $NetBSD: if_types.h,v 1.16 2000/04/19 06:30:53 itojun Exp $
*/
@@ -251,6 +251,5 @@
#define IFT_ENC 0xf4
#define IFT_PFLOG 0xf6
#define IFT_PFSYNC 0xf7
-#define IFT_CARP 0xf8 /* Common Address Redundancy Protocol */
#define IFT_IPXIP 0xf9 /* IPX over IP tunneling; no longer used. */
#endif /* !_NET_IF_TYPES_H_ */
Modified: trunk/sys/net/if_var.h
===================================================================
--- trunk/sys/net/if_var.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_var.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* From: @(#)if.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: release/9.2.0/sys/net/if_var.h 252781 2013-07-05 13:48:32Z andre $
+ * $FreeBSD: stable/10/sys/net/if_var.h 318505 2017-05-18 23:41:34Z rpokala $
*/
#ifndef _NET_IF_VAR_H_
@@ -70,6 +70,7 @@
struct socket;
struct ether_header;
struct carp_if;
+struct carp_softc;
struct ifvlantrunk;
struct route;
struct vnet;
@@ -96,10 +97,30 @@
TAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */
TAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */
-TAILQ_HEAD(ifprefixhead, ifprefix);
TAILQ_HEAD(ifmultihead, ifmultiaddr);
TAILQ_HEAD(ifgrouphead, ifg_group);
+#ifdef _KERNEL
+VNET_DECLARE(struct pfil_head, link_pfil_hook); /* packet filter hooks */
+#define V_link_pfil_hook VNET(link_pfil_hook)
+#endif /* _KERNEL */
+
+typedef enum {
+ IFCOUNTER_IPACKETS = 0,
+ IFCOUNTER_IERRORS,
+ IFCOUNTER_OPACKETS,
+ IFCOUNTER_OERRORS,
+ IFCOUNTER_COLLISIONS,
+ IFCOUNTER_IBYTES,
+ IFCOUNTER_OBYTES,
+ IFCOUNTER_IMCASTS,
+ IFCOUNTER_OMCASTS,
+ IFCOUNTER_IQDROPS,
+ IFCOUNTER_OQDROPS,
+ IFCOUNTER_NOPROTO,
+ IFCOUNTERS /* Array size. */
+} ift_counter;
+
/*
* Structure defining a queue for a network interface.
*/
@@ -112,6 +133,12 @@
struct mtx ifq_mtx;
};
+struct ifnet_hw_tsomax {
+ u_int tsomaxbytes; /* TSO total burst length limit in bytes */
+ u_int tsomaxsegcount; /* TSO maximum segment count */
+ u_int tsomaxsegsize; /* TSO maximum segment size in bytes */
+};
+
/*
* Structure defining a network interface.
*
@@ -154,7 +181,7 @@
int if_amcount; /* number of all-multicast requests */
/* procedure handles */
int (*if_output) /* output routine (enqueue) */
- (struct ifnet *, struct mbuf *, struct sockaddr *,
+ (struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
void (*if_input) /* input routine (from h/w driver) */
(struct ifnet *, struct mbuf *);
@@ -184,16 +211,16 @@
struct label *if_label; /* interface MAC label */
/* these are only used by IPv6 */
- struct ifprefixhead if_prefixhead; /* list of prefixes per if */
+ void *if_unused[2];
void *if_afdata[AF_MAX];
int if_afdata_initialized;
struct rwlock if_afdata_lock;
struct task if_linktask; /* task for link change events */
- struct mtx if_addr_mtx; /* mutex to protect address lists */
+ struct rwlock if_addr_lock; /* lock to protect address lists */
LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */
TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
- /* protected by if_addr_mtx */
+ /* protected by if_addr_lock */
void *if_pf_kif;
void *if_lagg; /* lagg glue */
char *if_description; /* interface description */
@@ -201,17 +228,40 @@
u_char if_alloctype; /* if_type at time of allocation */
/*
+ * Network adapter TSO limits:
+ * ===========================
+ *
+ * If the "if_hw_tsomax" field is zero the maximum segment
+ * length limit does not apply. If the "if_hw_tsomaxsegcount"
+ * or the "if_hw_tsomaxsegsize" field is zero the TSO segment
+ * count limit does not apply. If all three fields are zero,
+ * there is no TSO limit.
+ *
+ * NOTE: The TSO limits should reflect the values used in the
+ * BUSDMA tag a network adapter is using to load a mbuf chain
+ * for transmission. The TCP/IP network stack will subtract
+ * space for all linklevel and protocol level headers and
+ * ensure that the full mbuf chain passed to the network
+ * adapter fits within the given limits.
+ */
+ u_int if_hw_tsomax;
+
+ /*
* Spare fields are added so that we can modify sensitive data
* structures without changing the kernel binary interface, and must
* be used with care where binary compatibility is required.
*/
char if_cspare[3];
- u_int if_hw_tsomax; /* tso burst length limit, the minmum
- * is (IP_MAXPACKET / 8).
- * XXXAO: Have to find a better place
- * for it eventually. */
- int if_ispare[3];
- void *if_pspare[8]; /* 1 netmap, 7 TDB */
+ int if_ispare[2];
+
+ /*
+ * TSO fields for segment limits. If a field is zero below,
+ * there is no limit:
+ */
+ u_int if_hw_tsomaxsegcount; /* TSO maximum segment count */
+ u_int if_hw_tsomaxsegsize; /* TSO maximum segment size in bytes */
+ void *if_pspare[7]; /* 1 netmap, 6 TDB */
+ void *if_hw_addr; /* hardware link-level address */
};
typedef void if_init_f_t(void *);
@@ -228,6 +278,7 @@
#define if_metric if_data.ifi_metric
#define if_link_state if_data.ifi_link_state
#define if_baudrate if_data.ifi_baudrate
+#define if_baudrate_pf if_data.ifi_baudrate_pf
#define if_hwassist if_data.ifi_hwassist
#define if_ipackets if_data.ifi_ipackets
#define if_ierrors if_data.ifi_ierrors
@@ -250,18 +301,14 @@
/*
* Locks for address lists on the network interface.
*/
-#define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_mtx, \
- "if_addr_mtx", NULL, MTX_DEF)
-#define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_mtx)
-#define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_mtx)
-#define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_mtx)
-#define IF_ADDR_RLOCK(if) mtx_lock(&(if)->if_addr_mtx)
-#define IF_ADDR_RUNLOCK(if) mtx_unlock(&(if)->if_addr_mtx)
-#define IF_ADDR_LOCK_ASSERT(if) mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
-#define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
-/* XXX: Compat. */
-#define IF_ADDR_LOCK(if) IF_ADDR_WLOCK(if)
-#define IF_ADDR_UNLOCK(if) IF_ADDR_WUNLOCK(if)
+#define IF_ADDR_LOCK_INIT(if) rw_init(&(if)->if_addr_lock, "if_addr_lock")
+#define IF_ADDR_LOCK_DESTROY(if) rw_destroy(&(if)->if_addr_lock)
+#define IF_ADDR_WLOCK(if) rw_wlock(&(if)->if_addr_lock)
+#define IF_ADDR_WUNLOCK(if) rw_wunlock(&(if)->if_addr_lock)
+#define IF_ADDR_RLOCK(if) rw_rlock(&(if)->if_addr_lock)
+#define IF_ADDR_RUNLOCK(if) rw_runlock(&(if)->if_addr_lock)
+#define IF_ADDR_LOCK_ASSERT(if) rw_assert(&(if)->if_addr_lock, RA_LOCKED)
+#define IF_ADDR_WLOCK_ASSERT(if) rw_assert(&(if)->if_addr_lock, RA_WLOCKED)
/*
* Function variations on locking macros intended to be used by loadable
@@ -277,7 +324,7 @@
* Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq)
* are queues of messages stored on ifqueue structures
* (defined above). Entries are added to and deleted from these structures
- * by these macros, which should be called with ipl raised to splimp().
+ * by these macros.
*/
#define IF_LOCK(ifq) mtx_lock(&(ifq)->ifq_mtx)
#define IF_UNLOCK(ifq) mtx_unlock(&(ifq)->ifq_mtx)
@@ -379,6 +426,11 @@
/* Interface link state change event */
typedef void (*ifnet_link_event_handler_t)(void *, struct ifnet *, int);
EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t);
+/* Interface up/down event */
+#define IFNET_EVENT_UP 0
+#define IFNET_EVENT_DOWN 1
+typedef void (*ifnet_event_fn)(void *, struct ifnet *ifp, int event);
+EVENTHANDLER_DECLARE(ifnet_event, ifnet_event_fn);
/*
* interface groups
@@ -594,6 +646,18 @@
} while (0)
#ifdef _KERNEL
+static __inline void
+if_initbaudrate(struct ifnet *ifp, uintmax_t baud)
+{
+
+ ifp->if_baudrate_pf = 0;
+ while (baud > (u_long)(~0UL)) {
+ baud /= 10;
+ ifp->if_baudrate_pf++;
+ }
+ ifp->if_baudrate = baud;
+}
+
static __inline int
drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m)
{
@@ -648,7 +712,7 @@
return (m);
}
#endif
- return(buf_ring_peek(br));
+ return(buf_ring_peek_clear_sc(br));
}
static __inline void
@@ -781,6 +845,7 @@
struct sockaddr *ifa_netmask; /* used to determine subnet */
struct if_data if_data; /* not all members are meaningful */
struct ifnet *ifa_ifp; /* back-pointer to interface */
+ struct carp_softc *ifa_carp; /* pointer to CARP data */
TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */
void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */
(int, struct rtentry *, struct rt_addrinfo *);
@@ -807,20 +872,6 @@
#endif
/*
- * The prefix structure contains information about one prefix
- * of an interface. They are maintained by the different address families,
- * are allocated and attached when a prefix or an address is set,
- * and are linked together so all prefixes for an interface can be located.
- */
-struct ifprefix {
- struct sockaddr *ifpr_prefix; /* prefix of interface */
- struct ifnet *ifpr_ifp; /* back-pointer to interface */
- TAILQ_ENTRY(ifprefix) ifpr_list; /* queue macro glue */
- u_char ifpr_plen; /* prefix length in bits */
- u_char ifpr_type; /* protocol dependent prefix type */
-};
-
-/*
* Multicast address structure. This is analogous to the ifaddr
* structure except that it keeps track of multicast addresses.
*/
@@ -917,7 +968,6 @@
struct ifmultiaddr *
if_findmulti(struct ifnet *, struct sockaddr *);
void if_free(struct ifnet *);
-void if_free_type(struct ifnet *, u_char);
void if_initname(struct ifnet *, const char *, int);
void if_link_state_change(struct ifnet *, int);
int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3);
@@ -925,6 +975,7 @@
void if_ref(struct ifnet *);
void if_rele(struct ifnet *);
int if_setlladdr(struct ifnet *, const u_char *, int);
+int if_gethwaddr(struct ifnet *, struct ifreq *);
void if_up(struct ifnet *);
int ifioctl(struct socket *, u_long, caddr_t, struct thread *);
int ifpromisc(struct ifnet *, int);
@@ -941,11 +992,13 @@
int ifa_ifwithaddr_check(struct sockaddr *);
struct ifaddr *ifa_ifwithbroadaddr(struct sockaddr *);
struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *);
+struct ifaddr *ifa_ifwithdstaddr_fib(struct sockaddr *, int);
struct ifaddr *ifa_ifwithnet(struct sockaddr *, int);
+struct ifaddr *ifa_ifwithnet_fib(struct sockaddr *, int, int);
struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *);
struct ifaddr *ifa_ifwithroute_fib(int, struct sockaddr *, struct sockaddr *, u_int);
-
struct ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *);
+int ifa_preferred(struct ifaddr *, struct ifaddr *);
int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen);
@@ -953,6 +1006,8 @@
typedef void if_com_free_t(void *com, u_char type);
void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f);
void if_deregister_com_alloc(u_char type);
+uint64_t if_get_counter_default(struct ifnet *, ift_counter);
+void if_inc_counter(struct ifnet *, ift_counter, int64_t);
#define IF_LLADDR(ifp) \
LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr))
@@ -965,6 +1020,10 @@
int ether_poll_deregister(struct ifnet *ifp);
#endif /* DEVICE_POLLING */
+/* TSO */
+void if_hw_tsomax_common(struct ifnet *, struct ifnet_hw_tsomax *);
+int if_hw_tsomax_update(struct ifnet *, struct ifnet_hw_tsomax *);
+
#endif /* _KERNEL */
#endif /* !_NET_IF_VAR_H_ */
Modified: trunk/sys/net/if_vlan.c
===================================================================
--- trunk/sys/net/if_vlan.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_vlan.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -40,7 +40,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/if_vlan.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/if_vlan.c 332160 2018-04-07 00:04:28Z brooks $");
#include "opt_inet.h"
#include "opt_vlan.h"
@@ -73,7 +73,6 @@
#include <netinet/if_ether.h>
#endif
-#define VLANNAME "vlan"
#define VLAN_DEF_HWIDTH 4
#define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST)
@@ -121,7 +120,7 @@
#endif
};
#define ifv_proto ifv_mib.ifvm_proto
-#define ifv_tag ifv_mib.ifvm_tag
+#define ifv_vid ifv_mib.ifvm_tag
#define ifv_encaplen ifv_mib.ifvm_encaplen
#define ifv_mtufudge ifv_mib.ifvm_mtufudge
#define ifv_mintu ifv_mib.ifvm_mintu
@@ -146,7 +145,8 @@
SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0,
"pad short frames before tagging");
-static MALLOC_DEFINE(M_VLAN, VLANNAME, "802.1Q Virtual LAN Interface");
+static const char vlanname[] = "vlan";
+static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface");
static eventhandler_tag ifdetach_tag;
static eventhandler_tag iflladdr_tag;
@@ -169,7 +169,7 @@
#define VLAN_LOCK_ASSERT() sx_assert(&ifv_lock, SA_LOCKED)
#define VLAN_LOCK() sx_xlock(&ifv_lock)
#define VLAN_UNLOCK() sx_xunlock(&ifv_lock)
-#define TRUNK_LOCK_INIT(trunk) rw_init(&(trunk)->rw, VLANNAME)
+#define TRUNK_LOCK_INIT(trunk) rw_init(&(trunk)->rw, vlanname)
#define TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw)
#define TRUNK_LOCK(trunk) rw_wlock(&(trunk)->rw)
#define TRUNK_UNLOCK(trunk) rw_wunlock(&(trunk)->rw)
@@ -185,7 +185,7 @@
static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch);
static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
- uint16_t tag);
+ uint16_t vid);
#endif
static void trunk_destroy(struct ifvlantrunk *trunk);
@@ -205,7 +205,7 @@
static void vlan_capabilities(struct ifvlan *ifv);
static void vlan_trunk_capabilities(struct ifnet *ifp);
-static struct ifnet *vlan_clone_match_ethertag(struct if_clone *,
+static struct ifnet *vlan_clone_match_ethervid(struct if_clone *,
const char *, int *);
static int vlan_clone_match(struct if_clone *, const char *);
static int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t);
@@ -214,11 +214,10 @@
static void vlan_ifdetach(void *arg, struct ifnet *ifp);
static void vlan_iflladdr(void *arg, struct ifnet *ifp);
-static struct if_clone vlan_cloner = IFC_CLONE_INITIALIZER(VLANNAME, NULL,
- IF_MAXUNIT, NULL, vlan_clone_match, vlan_clone_create, vlan_clone_destroy);
+static struct if_clone *vlan_cloner;
#ifdef VIMAGE
-static VNET_DEFINE(struct if_clone, vlan_cloner);
+static VNET_DEFINE(struct if_clone *, vlan_cloner);
#define V_vlan_cloner VNET(vlan_cloner)
#endif
@@ -273,9 +272,9 @@
KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
b = 1 << trunk->hwidth;
- i = HASH(ifv->ifv_tag, trunk->hmask);
+ i = HASH(ifv->ifv_vid, trunk->hmask);
LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
- if (ifv->ifv_tag == ifv2->ifv_tag)
+ if (ifv->ifv_vid == ifv2->ifv_vid)
return (EEXIST);
/*
@@ -285,7 +284,7 @@
*/
if (trunk->refcnt > (b * b) / 2) {
vlan_growhash(trunk, 1);
- i = HASH(ifv->ifv_tag, trunk->hmask);
+ i = HASH(ifv->ifv_vid, trunk->hmask);
}
LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
trunk->refcnt++;
@@ -303,7 +302,7 @@
KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
b = 1 << trunk->hwidth;
- i = HASH(ifv->ifv_tag, trunk->hmask);
+ i = HASH(ifv->ifv_vid, trunk->hmask);
LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
if (ifv2 == ifv) {
trunk->refcnt--;
@@ -355,7 +354,7 @@
for (i = 0; i < n; i++)
while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) {
LIST_REMOVE(ifv, ifv_list);
- j = HASH(ifv->ifv_tag, n2 - 1);
+ j = HASH(ifv->ifv_vid, n2 - 1);
LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
}
free(trunk->hash, M_VLAN);
@@ -369,14 +368,14 @@
}
static __inline struct ifvlan *
-vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag)
+vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
{
struct ifvlan *ifv;
TRUNK_LOCK_RASSERT(trunk);
- LIST_FOREACH(ifv, &trunk->hash[HASH(tag, trunk->hmask)], ifv_list)
- if (ifv->ifv_tag == tag)
+ LIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list)
+ if (ifv->ifv_vid == vid)
return (ifv);
return (NULL);
}
@@ -400,10 +399,10 @@
#else
static __inline struct ifvlan *
-vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag)
+vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
{
- return trunk->vlans[tag];
+ return trunk->vlans[vid];
}
static __inline int
@@ -410,9 +409,9 @@
vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
{
- if (trunk->vlans[ifv->ifv_tag] != NULL)
+ if (trunk->vlans[ifv->ifv_vid] != NULL)
return EEXIST;
- trunk->vlans[ifv->ifv_tag] = ifv;
+ trunk->vlans[ifv->ifv_vid] = ifv;
trunk->refcnt++;
return (0);
@@ -422,7 +421,7 @@
vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
{
- trunk->vlans[ifv->ifv_tag] = NULL;
+ trunk->vlans[ifv->ifv_vid] = NULL;
trunk->refcnt--;
return (0);
@@ -460,23 +459,19 @@
* traffic that it doesn't really want, which ends up being discarded
* later by the upper protocol layers. Unfortunately, there's no way
* to avoid this: there really is only one physical interface.
- *
- * XXX: There is a possible race here if more than one thread is
- * modifying the multicast state of the vlan interface at the same time.
*/
static int
vlan_setmulti(struct ifnet *ifp)
{
struct ifnet *ifp_p;
- struct ifmultiaddr *ifma, *rifma = NULL;
+ struct ifmultiaddr *ifma;
struct ifvlan *sc;
struct vlan_mc_entry *mc;
int error;
- /*VLAN_LOCK_ASSERT();*/
-
/* Find the parent. */
sc = ifp->if_softc;
+ TRUNK_LOCK_ASSERT(TRUNK(sc));
ifp_p = PARENT(sc);
CURVNET_SET_QUIET(ifp_p->if_vnet);
@@ -483,25 +478,29 @@
/* First, remove any existing filter entries. */
while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
- error = if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
- if (error)
- return (error);
SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
+ (void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
free(mc, M_VLAN);
}
/* Now program new ones. */
+ IF_ADDR_WLOCK(ifp);
TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT);
- if (mc == NULL)
+ if (mc == NULL) {
+ IF_ADDR_WUNLOCK(ifp);
return (ENOMEM);
+ }
bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len);
mc->mc_addr.sdl_index = ifp_p->if_index;
SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
+ }
+ IF_ADDR_WUNLOCK(ifp);
+ SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) {
error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr,
- &rifma);
+ NULL);
if (error)
return (error);
}
@@ -624,10 +623,14 @@
}
/*
- * Return the 16bit vlan tag for this interface.
+ * Return the 12-bit VLAN VID for this interface, for use by external
+ * components such as Infiniband.
+ *
+ * XXXRW: Note that the function name here is historical; it should be named
+ * vlan_vid().
*/
static int
-vlan_tag(struct ifnet *ifp, uint16_t *tagp)
+vlan_tag(struct ifnet *ifp, uint16_t *vidp)
{
struct ifvlan *ifv;
@@ -634,7 +637,7 @@
if (ifp->if_type != IFT_L2VLAN)
return (EINVAL);
ifv = ifp->if_softc;
- *tagp = ifv->ifv_tag;
+ *vidp = ifv->ifv_vid;
return (0);
}
@@ -670,10 +673,10 @@
}
/*
- * Return the vlan device present at the specific tag.
+ * Return the vlan device present at the specific VID.
*/
static struct ifnet *
-vlan_devat(struct ifnet *ifp, uint16_t tag)
+vlan_devat(struct ifnet *ifp, uint16_t vid)
{
struct ifvlantrunk *trunk;
struct ifvlan *ifv;
@@ -683,7 +686,7 @@
return (NULL);
ifp = NULL;
TRUNK_RLOCK(trunk);
- ifv = vlan_gethash(trunk, tag);
+ ifv = vlan_gethash(trunk, vid);
if (ifv)
ifp = ifv->ifv_ifp;
TRUNK_RUNLOCK(trunk);
@@ -726,7 +729,8 @@
vlan_tag_p = vlan_tag;
vlan_devat_p = vlan_devat;
#ifndef VIMAGE
- if_clone_attach(&vlan_cloner);
+ vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
+ vlan_clone_create, vlan_clone_destroy);
#endif
if (bootverbose)
printf("vlan: initialized, using "
@@ -740,7 +744,7 @@
break;
case MOD_UNLOAD:
#ifndef VIMAGE
- if_clone_detach(&vlan_cloner);
+ if_clone_detach(vlan_cloner);
#endif
EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag);
EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag);
@@ -749,8 +753,8 @@
vlan_trunk_cap_p = NULL;
vlan_trunkdev_p = NULL;
vlan_tag_p = NULL;
- vlan_cookie_p = vlan_cookie;
- vlan_setcookie_p = vlan_setcookie;
+ vlan_cookie_p = NULL;
+ vlan_setcookie_p = NULL;
vlan_devat_p = NULL;
VLAN_LOCK_DESTROY();
if (bootverbose)
@@ -776,8 +780,9 @@
vnet_vlan_init(const void *unused __unused)
{
+ vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
+ vlan_clone_create, vlan_clone_destroy);
V_vlan_cloner = vlan_cloner;
- if_clone_attach(&V_vlan_cloner);
}
VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
vnet_vlan_init, NULL);
@@ -786,7 +791,7 @@
vnet_vlan_uninit(const void *unused __unused)
{
- if_clone_detach(&V_vlan_cloner);
+ if_clone_detach(V_vlan_cloner);
}
VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
vnet_vlan_uninit, NULL);
@@ -793,11 +798,11 @@
#endif
static struct ifnet *
-vlan_clone_match_ethertag(struct if_clone *ifc, const char *name, int *tag)
+vlan_clone_match_ethervid(struct if_clone *ifc, const char *name, int *vidp)
{
const char *cp;
struct ifnet *ifp;
- int t;
+ int vid;
/* Check for <etherif>.<vlan> style interface names. */
IFNET_RLOCK_NOSLEEP();
@@ -816,13 +821,13 @@
continue;
if (*cp == '\0')
continue;
- t = 0;
+ vid = 0;
for(; *cp >= '0' && *cp <= '9'; cp++)
- t = (t * 10) + (*cp - '0');
+ vid = (vid * 10) + (*cp - '0');
if (*cp != '\0')
continue;
- if (tag != NULL)
- *tag = t;
+ if (vidp != NULL)
+ *vidp = vid;
break;
}
IFNET_RUNLOCK_NOSLEEP();
@@ -835,10 +840,10 @@
{
const char *cp;
- if (vlan_clone_match_ethertag(ifc, name, NULL) != NULL)
+ if (vlan_clone_match_ethervid(ifc, name, NULL) != NULL)
return (1);
- if (strncmp(VLANNAME, name, strlen(VLANNAME)) != 0)
+ if (strncmp(vlanname, name, strlen(vlanname)) != 0)
return (0);
for (cp = name + 4; *cp != '\0'; cp++) {
if (*cp < '0' || *cp > '9')
@@ -855,7 +860,7 @@
int wildcard;
int unit;
int error;
- int tag;
+ int vid;
int ethertag;
struct ifvlan *ifv;
struct ifnet *ifp;
@@ -873,6 +878,9 @@
* must be configured separately.
* The first technique is preferred; the latter two are
* supported for backwards compatibilty.
+ *
+ * XXXRW: Note historic use of the word "tag" here. New ioctls may be
+ * called for.
*/
if (params) {
error = copyin(params, &vlr, sizeof(vlr));
@@ -882,7 +890,7 @@
if (p == NULL)
return ENXIO;
/*
- * Don't let the caller set up a VLAN tag with
+ * Don't let the caller set up a VLAN VID with
* anything except VLID bits.
*/
if (vlr.vlr_tag & ~EVL_VLID_MASK)
@@ -892,18 +900,18 @@
return (error);
ethertag = 1;
- tag = vlr.vlr_tag;
+ vid = vlr.vlr_tag;
wildcard = (unit < 0);
- } else if ((p = vlan_clone_match_ethertag(ifc, name, &tag)) != NULL) {
+ } else if ((p = vlan_clone_match_ethervid(ifc, name, &vid)) != NULL) {
ethertag = 1;
unit = -1;
wildcard = 0;
/*
- * Don't let the caller set up a VLAN tag with
+ * Don't let the caller set up a VLAN VID with
* anything except VLID bits.
*/
- if (tag & ~EVL_VLID_MASK)
+ if (vid & ~EVL_VLID_MASK)
return (EINVAL);
} else {
ethertag = 0;
@@ -943,7 +951,7 @@
* we don't conform to the default naming convention for interfaces.
*/
strlcpy(ifp->if_xname, name, IFNAMSIZ);
- ifp->if_dname = ifc->ifc_name;
+ ifp->if_dname = vlanname;
ifp->if_dunit = unit;
/* NB: flags are not set here */
ifp->if_linkmib = &ifv->ifv_mib;
@@ -965,7 +973,7 @@
sdl->sdl_type = IFT_L2VLAN;
if (ethertag) {
- error = vlan_config(ifv, p, tag);
+ error = vlan_config(ifv, p, vid);
if (error != 0) {
/*
* Since we've partially failed, we need to back
@@ -974,7 +982,7 @@
*/
ether_ifdetach(ifp);
vlan_unconfig(ifp);
- if_free_type(ifp, IFT_ETHER);
+ if_free(ifp);
ifc_free_unit(ifc, unit);
free(ifv, M_VLAN);
@@ -996,7 +1004,7 @@
ether_ifdetach(ifp); /* first, remove it from system-wide lists */
vlan_unconfig(ifp); /* now it can be unconfigured and freed */
- if_free_type(ifp, IFT_ETHER);
+ if_free(ifp);
free(ifv, M_VLAN);
ifc_free_unit(ifc, unit);
@@ -1035,7 +1043,7 @@
if (!UP_AND_RUNNING(p)) {
m_freem(m);
ifp->if_oerrors++;
- return (0);
+ return (ENETDOWN);
}
/*
@@ -1075,10 +1083,10 @@
* packet tag that holds it.
*/
if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
- m->m_pkthdr.ether_vtag = ifv->ifv_tag;
+ m->m_pkthdr.ether_vtag = ifv->ifv_vid;
m->m_flags |= M_VLANTAG;
} else {
- m = ether_vlanencap(m, ifv->ifv_tag);
+ m = ether_vlanencap(m, ifv->ifv_vid);
if (m == NULL) {
if_printf(ifp, "unable to prepend VLAN header\n");
ifp->if_oerrors++;
@@ -1112,7 +1120,7 @@
{
struct ifvlantrunk *trunk = ifp->if_vlantrunk;
struct ifvlan *ifv;
- uint16_t tag;
+ uint16_t vid;
KASSERT(trunk != NULL, ("%s: no trunk", __func__));
@@ -1121,7 +1129,7 @@
* Packet is tagged, but m contains a normal
* Ethernet frame; the tag is stored out-of-band.
*/
- tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
+ vid = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
m->m_flags &= ~M_VLANTAG;
} else {
struct ether_vlan_header *evl;
@@ -1137,7 +1145,7 @@
return;
}
evl = mtod(m, struct ether_vlan_header *);
- tag = EVL_VLANOFTAG(ntohs(evl->evl_tag));
+ vid = EVL_VLANOFTAG(ntohs(evl->evl_tag));
/*
* Remove the 802.1q header by copying the Ethernet
@@ -1162,7 +1170,7 @@
}
TRUNK_RLOCK(trunk);
- ifv = vlan_gethash(trunk, tag);
+ ifv = vlan_gethash(trunk, vid);
if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
TRUNK_RUNLOCK(trunk);
m_freem(m);
@@ -1179,7 +1187,7 @@
}
static int
-vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag)
+vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
{
struct ifvlantrunk *trunk;
struct ifnet *ifp;
@@ -1186,7 +1194,7 @@
int error = 0;
/* VID numbers 0x0 and 0xFFF are reserved */
- if (tag == 0 || tag == 0xFFF)
+ if (vid == 0 || vid == 0xFFF)
return (EINVAL);
if (p->if_type != IFT_ETHER &&
(p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
@@ -1218,7 +1226,7 @@
TRUNK_LOCK(trunk);
}
- ifv->ifv_tag = tag; /* must set this before vlan_inshash() */
+ ifv->ifv_vid = vid; /* must set this before vlan_inshash() */
error = vlan_inshash(trunk, ifv);
if (error)
goto done;
@@ -1296,7 +1304,7 @@
done:
TRUNK_UNLOCK(trunk);
if (error == 0)
- EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_tag);
+ EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid);
VLAN_UNLOCK();
return (error);
@@ -1365,7 +1373,7 @@
* Check if we were the last.
*/
if (trunk->refcnt == 0) {
- trunk->parent->if_vlantrunk = NULL;
+ parent->if_vlantrunk = NULL;
/*
* XXXGL: If some ithread has already entered
* vlan_input() and is now blocked on the trunk
@@ -1392,7 +1400,7 @@
* to cleanup anyway.
*/
if (parent != NULL)
- EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_tag);
+ EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid);
}
/* Handle a reference counted flag that should be set on the parent as well */
@@ -1476,6 +1484,7 @@
{
struct ifnet *p = PARENT(ifv);
struct ifnet *ifp = ifv->ifv_ifp;
+ struct ifnet_hw_tsomax hw_tsomax;
TRUNK_LOCK_ASSERT(TRUNK(ifv));
@@ -1486,13 +1495,16 @@
* offloading requires hardware VLAN tagging.
*/
if (p->if_capabilities & IFCAP_VLAN_HWCSUM)
- ifp->if_capabilities = p->if_capabilities & IFCAP_HWCSUM;
+ ifp->if_capabilities =
+ p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
if (p->if_capenable & IFCAP_VLAN_HWCSUM &&
p->if_capenable & IFCAP_VLAN_HWTAGGING) {
- ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM;
+ ifp->if_capenable =
+ p->if_capenable & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
ifp->if_hwassist = p->if_hwassist & (CSUM_IP | CSUM_TCP |
- CSUM_UDP | CSUM_SCTP | CSUM_IP_FRAGS | CSUM_FRAGMENT);
+ CSUM_UDP | CSUM_SCTP | CSUM_TCP_IPV6 | CSUM_UDP_IPV6 |
+ CSUM_SCTP_IPV6);
} else {
ifp->if_capenable = 0;
ifp->if_hwassist = 0;
@@ -1502,6 +1514,9 @@
* propagate the hardware-assisted flag. TSO on VLANs
* does not necessarily require hardware VLAN tagging.
*/
+ memset(&hw_tsomax, 0, sizeof(hw_tsomax));
+ if_hw_tsomax_common(p, &hw_tsomax);
+ if_hw_tsomax_update(ifp, &hw_tsomax);
if (p->if_capabilities & IFCAP_VLAN_HWTSO)
ifp->if_capabilities |= p->if_capabilities & IFCAP_TSO;
if (p->if_capenable & IFCAP_VLAN_HWTSO) {
@@ -1511,6 +1526,22 @@
ifp->if_capenable &= ~(p->if_capenable & IFCAP_TSO);
ifp->if_hwassist &= ~(p->if_hwassist & CSUM_TSO);
}
+
+ /*
+ * If the parent interface can offload TCP connections over VLANs then
+ * propagate its TOE capability to the VLAN interface.
+ *
+ * All TOE drivers in the tree today can deal with VLANs. If this
+ * changes then IFCAP_VLAN_TOE should be promoted to a full capability
+ * with its own bit.
+ */
+#define IFCAP_VLAN_TOE IFCAP_TOE
+ if (p->if_capabilities & IFCAP_VLAN_TOE)
+ ifp->if_capabilities |= p->if_capabilities & IFCAP_TOE;
+ if (p->if_capenable & IFCAP_VLAN_TOE) {
+ TOEDEV(ifp) = TOEDEV(p);
+ ifp->if_capenable |= p->if_capenable & IFCAP_TOE;
+ }
}
static void
@@ -1541,6 +1572,7 @@
struct ifreq *ifr;
struct ifaddr *ifa;
struct ifvlan *ifv;
+ struct ifvlantrunk *trunk;
struct vlanreq vlr;
int error = 0;
@@ -1557,12 +1589,8 @@
#endif
break;
case SIOCGIFADDR:
- {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *)&ifr->ifr_data;
- bcopy(IF_LLADDR(ifp), sa->sa_data, ifp->if_addrlen);
- }
+ bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
+ ifp->if_addrlen);
break;
case SIOCGIFMEDIA:
VLAN_LOCK();
@@ -1612,6 +1640,13 @@
case SIOCSETVLAN:
#ifdef VIMAGE
+ /*
+ * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN
+ * interface to be delegated to a jail without allowing the
+ * jail to change what underlying interface/VID it is
+ * associated with. We are not entirely convinced that this
+ * is the right way to accomplish that policy goal.
+ */
if (ifp->if_vnet != ifp->if_home_vnet) {
error = EPERM;
break;
@@ -1630,7 +1665,7 @@
break;
}
/*
- * Don't let the caller set up a VLAN tag with
+ * Don't let the caller set up a VLAN VID with
* anything except VLID bits.
*/
if (vlr.vlr_tag & ~EVL_VLID_MASK) {
@@ -1657,7 +1692,7 @@
if (TRUNK(ifv) != NULL) {
strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname,
sizeof(vlr.vlr_parent));
- vlr.vlr_tag = ifv->ifv_tag;
+ vlr.vlr_tag = ifv->ifv_vid;
}
VLAN_UNLOCK();
error = copyout(&vlr, ifr->ifr_data, sizeof(vlr));
@@ -1678,8 +1713,12 @@
* If we don't have a parent, just remember the membership for
* when we do.
*/
- if (TRUNK(ifv) != NULL)
+ trunk = TRUNK(ifv);
+ if (trunk != NULL) {
+ TRUNK_LOCK(trunk);
error = vlan_setmulti(ifp);
+ TRUNK_UNLOCK(trunk);
+ }
break;
default:
Modified: trunk/sys/net/if_vlan_var.h
===================================================================
--- trunk/sys/net/if_vlan_var.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/if_vlan_var.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -27,7 +27,7 @@
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/if_vlan_var.h 219819 2011-03-21 09:40:01Z jeff $
+ * $FreeBSD: stable/10/sys/net/if_vlan_var.h 326512 2017-12-04 09:27:36Z hselasky $
*/
#ifndef _NET_IF_VLAN_VAR_H_
@@ -109,7 +109,7 @@
* received VLAN tag (containing both vlan and priority information)
* into the ether_vtag mbuf packet header field:
*
- * m->m_pkthdr.ether_vtag = vlan_id; // ntohs()?
+ * m->m_pkthdr.ether_vtag = vtag; // ntohs()?
* m->m_flags |= M_VLANTAG;
*
* to mark the packet m with the specified VLAN tag.
@@ -133,16 +133,16 @@
} while (0)
#define VLAN_TRUNKDEV(_ifp) \
- (_ifp)->if_type == IFT_L2VLAN ? (*vlan_trunkdev_p)((_ifp)) : NULL
-#define VLAN_TAG(_ifp, _tag) \
- (_ifp)->if_type == IFT_L2VLAN ? (*vlan_tag_p)((_ifp), (_tag)) : EINVAL
+ ((_ifp)->if_type == IFT_L2VLAN ? (*vlan_trunkdev_p)((_ifp)) : NULL)
+#define VLAN_TAG(_ifp, _vid) \
+ ((_ifp)->if_type == IFT_L2VLAN ? (*vlan_tag_p)((_ifp), (_vid)) : EINVAL)
#define VLAN_COOKIE(_ifp) \
- (_ifp)->if_type == IFT_L2VLAN ? (*vlan_cookie_p)((_ifp)) : NULL
+ ((_ifp)->if_type == IFT_L2VLAN ? (*vlan_cookie_p)((_ifp)) : NULL)
#define VLAN_SETCOOKIE(_ifp, _cookie) \
- (_ifp)->if_type == IFT_L2VLAN ? \
- (*vlan_setcookie_p)((_ifp), (_cookie)) : EINVAL
-#define VLAN_DEVAT(_ifp, _tag) \
- (_ifp)->if_vlantrunk != NULL ? (*vlan_devat_p)((_ifp), (_tag)) : NULL
+ ((_ifp)->if_type == IFT_L2VLAN ? \
+ (*vlan_setcookie_p)((_ifp), (_cookie)) : EINVAL)
+#define VLAN_DEVAT(_ifp, _vid) \
+ ((_ifp)->if_vlantrunk != NULL ? (*vlan_devat_p)((_ifp), (_vid)) : NULL)
extern void (*vlan_trunk_cap_p)(struct ifnet *);
extern struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
Added: trunk/sys/net/if_vxlan.c
===================================================================
--- trunk/sys/net/if_vxlan.c (rev 0)
+++ trunk/sys/net/if_vxlan.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,3090 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014, Bryan Venteicher <bryanv at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/net/if_vxlan.c 327142 2017-12-24 02:06:16Z ae $");
+
+#include <sys/param.h>
+#include <sys/eventhandler.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/hash.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/refcount.h>
+#include <sys/rmlock.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/sbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_vxlan.h>
+#include <net/netisr.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/ip_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+
+struct vxlan_softc;
+LIST_HEAD(vxlan_softc_head, vxlan_softc);
+
+struct vxlan_socket_mc_info {
+ union vxlan_sockaddr vxlsomc_saddr;
+ union vxlan_sockaddr vxlsomc_gaddr;
+ int vxlsomc_ifidx;
+ int vxlsomc_users;
+};
+
+#define VXLAN_SO_MC_MAX_GROUPS 32
+
+#define VXLAN_SO_VNI_HASH_SHIFT 6
+#define VXLAN_SO_VNI_HASH_SIZE (1 << VXLAN_SO_VNI_HASH_SHIFT)
+#define VXLAN_SO_VNI_HASH(_vni) ((_vni) % VXLAN_SO_VNI_HASH_SIZE)
+
+struct vxlan_socket {
+ struct socket *vxlso_sock;
+ struct rmlock vxlso_lock;
+ u_int vxlso_refcnt;
+ union vxlan_sockaddr vxlso_laddr;
+ LIST_ENTRY(vxlan_socket) vxlso_entry;
+ struct vxlan_softc_head vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE];
+ struct vxlan_socket_mc_info vxlso_mc[VXLAN_SO_MC_MAX_GROUPS];
+};
+
+#define VXLAN_SO_RLOCK(_vso, _p) rm_rlock(&(_vso)->vxlso_lock, (_p))
+#define VXLAN_SO_RUNLOCK(_vso, _p) rm_runlock(&(_vso)->vxlso_lock, (_p))
+#define VXLAN_SO_WLOCK(_vso) rm_wlock(&(_vso)->vxlso_lock)
+#define VXLAN_SO_WUNLOCK(_vso) rm_wunlock(&(_vso)->vxlso_lock)
+#define VXLAN_SO_LOCK_ASSERT(_vso) \
+ rm_assert(&(_vso)->vxlso_lock, RA_LOCKED)
+#define VXLAN_SO_LOCK_WASSERT(_vso) \
+ rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED)
+
+#define VXLAN_SO_ACQUIRE(_vso) refcount_acquire(&(_vso)->vxlso_refcnt)
+#define VXLAN_SO_RELEASE(_vso) refcount_release(&(_vso)->vxlso_refcnt)
+
+struct vxlan_ftable_entry {
+ LIST_ENTRY(vxlan_ftable_entry) vxlfe_hash;
+ uint16_t vxlfe_flags;
+ uint8_t vxlfe_mac[ETHER_ADDR_LEN];
+ union vxlan_sockaddr vxlfe_raddr;
+ time_t vxlfe_expire;
+};
+
+#define VXLAN_FE_FLAG_DYNAMIC 0x01
+#define VXLAN_FE_FLAG_STATIC 0x02
+
+#define VXLAN_FE_IS_DYNAMIC(_fe) \
+ ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC)
+
+#define VXLAN_SC_FTABLE_SHIFT 9
+#define VXLAN_SC_FTABLE_SIZE (1 << VXLAN_SC_FTABLE_SHIFT)
+#define VXLAN_SC_FTABLE_MASK (VXLAN_SC_FTABLE_SIZE - 1)
+#define VXLAN_SC_FTABLE_HASH(_sc, _mac) \
+ (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE)
+
+LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry);
+
+struct vxlan_statistics {
+ uint32_t ftable_nospace;
+ uint32_t ftable_lock_upgrade_failed;
+};
+
+struct vxlan_softc {
+ struct ifnet *vxl_ifp;
+ struct vxlan_socket *vxl_sock;
+ uint32_t vxl_vni;
+ union vxlan_sockaddr vxl_src_addr;
+ union vxlan_sockaddr vxl_dst_addr;
+ uint32_t vxl_flags;
+#define VXLAN_FLAG_INIT 0x0001
+#define VXLAN_FLAG_TEARDOWN 0x0002
+#define VXLAN_FLAG_LEARN 0x0004
+
+ uint32_t vxl_port_hash_key;
+ uint16_t vxl_min_port;
+ uint16_t vxl_max_port;
+ uint8_t vxl_ttl;
+
+ /* Lookup table from MAC address to forwarding entry. */
+ uint32_t vxl_ftable_cnt;
+ uint32_t vxl_ftable_max;
+ uint32_t vxl_ftable_timeout;
+ uint32_t vxl_ftable_hash_key;
+ struct vxlan_ftable_head *vxl_ftable;
+
+ /* Derived from vxl_dst_addr. */
+ struct vxlan_ftable_entry vxl_default_fe;
+
+ struct ip_moptions *vxl_im4o;
+ struct ip6_moptions *vxl_im6o;
+
+ struct rmlock vxl_lock;
+ volatile u_int vxl_refcnt;
+
+ int vxl_unit;
+ int vxl_vso_mc_index;
+ struct vxlan_statistics vxl_stats;
+ struct sysctl_oid *vxl_sysctl_node;
+ struct sysctl_ctx_list vxl_sysctl_ctx;
+ struct callout vxl_callout;
+ uint8_t vxl_hwaddr[ETHER_ADDR_LEN];
+ int vxl_mc_ifindex;
+ struct ifnet *vxl_mc_ifp;
+ char vxl_mc_ifname[IFNAMSIZ];
+ LIST_ENTRY(vxlan_softc) vxl_entry;
+ LIST_ENTRY(vxlan_softc) vxl_ifdetach_list;
+};
+
+#define VXLAN_RLOCK(_sc, _p) rm_rlock(&(_sc)->vxl_lock, (_p))
+#define VXLAN_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->vxl_lock, (_p))
+#define VXLAN_WLOCK(_sc) rm_wlock(&(_sc)->vxl_lock)
+#define VXLAN_WUNLOCK(_sc) rm_wunlock(&(_sc)->vxl_lock)
+#define VXLAN_LOCK_WOWNED(_sc) rm_wowned(&(_sc)->vxl_lock)
+#define VXLAN_LOCK_ASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_LOCKED)
+#define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED)
+#define VXLAN_UNLOCK(_sc, _p) do { \
+ if (VXLAN_LOCK_WOWNED(_sc)) \
+ VXLAN_WUNLOCK(_sc); \
+ else \
+ VXLAN_RUNLOCK(_sc, _p); \
+} while (0)
+
+#define VXLAN_ACQUIRE(_sc) refcount_acquire(&(_sc)->vxl_refcnt)
+#define VXLAN_RELEASE(_sc) refcount_release(&(_sc)->vxl_refcnt)
+
+#define satoconstsin(sa) ((const struct sockaddr_in *)(sa))
+#define satoconstsin6(sa) ((const struct sockaddr_in6 *)(sa))
+
+struct vxlanudphdr {
+ struct udphdr vxlh_udp;
+ struct vxlan_header vxlh_hdr;
+} __packed;
+
+static int vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *);
+static void vxlan_ftable_init(struct vxlan_softc *);
+static void vxlan_ftable_fini(struct vxlan_softc *);
+static void vxlan_ftable_flush(struct vxlan_softc *, int);
+static void vxlan_ftable_expire(struct vxlan_softc *);
+static int vxlan_ftable_update_locked(struct vxlan_softc *,
+ const struct sockaddr *, const uint8_t *,
+ struct rm_priotracker *);
+static int vxlan_ftable_update(struct vxlan_softc *,
+ const struct sockaddr *, const uint8_t *);
+static int vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS);
+
+static struct vxlan_ftable_entry *
+ vxlan_ftable_entry_alloc(void);
+static void vxlan_ftable_entry_free(struct vxlan_ftable_entry *);
+static void vxlan_ftable_entry_init(struct vxlan_softc *,
+ struct vxlan_ftable_entry *, const uint8_t *,
+ const struct sockaddr *, uint32_t);
+static void vxlan_ftable_entry_destroy(struct vxlan_softc *,
+ struct vxlan_ftable_entry *);
+static int vxlan_ftable_entry_insert(struct vxlan_softc *,
+ struct vxlan_ftable_entry *);
+static struct vxlan_ftable_entry *
+ vxlan_ftable_entry_lookup(struct vxlan_softc *,
+ const uint8_t *);
+static void vxlan_ftable_entry_dump(struct vxlan_ftable_entry *,
+ struct sbuf *);
+
+static struct vxlan_socket *
+ vxlan_socket_alloc(const union vxlan_sockaddr *);
+static void vxlan_socket_destroy(struct vxlan_socket *);
+static void vxlan_socket_release(struct vxlan_socket *);
+static struct vxlan_socket *
+ vxlan_socket_lookup(union vxlan_sockaddr *vxlsa);
+static void vxlan_socket_insert(struct vxlan_socket *);
+static int vxlan_socket_init(struct vxlan_socket *, struct ifnet *);
+static int vxlan_socket_bind(struct vxlan_socket *, struct ifnet *);
+static int vxlan_socket_create(struct ifnet *, int,
+ const union vxlan_sockaddr *, struct vxlan_socket **);
+static void vxlan_socket_ifdetach(struct vxlan_socket *,
+ struct ifnet *, struct vxlan_softc_head *);
+
+static struct vxlan_socket *
+ vxlan_socket_mc_lookup(const union vxlan_sockaddr *);
+static int vxlan_sockaddr_mc_info_match(
+ const struct vxlan_socket_mc_info *,
+ const union vxlan_sockaddr *,
+ const union vxlan_sockaddr *, int);
+static int vxlan_socket_mc_join_group(struct vxlan_socket *,
+ const union vxlan_sockaddr *, const union vxlan_sockaddr *,
+ int *, union vxlan_sockaddr *);
+static int vxlan_socket_mc_leave_group(struct vxlan_socket *,
+ const union vxlan_sockaddr *,
+ const union vxlan_sockaddr *, int);
+static int vxlan_socket_mc_add_group(struct vxlan_socket *,
+ const union vxlan_sockaddr *, const union vxlan_sockaddr *,
+ int, int *);
+static void vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *,
+ int);
+
+static struct vxlan_softc *
+ vxlan_socket_lookup_softc_locked(struct vxlan_socket *,
+ uint32_t);
+static struct vxlan_softc *
+ vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t);
+static int vxlan_socket_insert_softc(struct vxlan_socket *,
+ struct vxlan_softc *);
+static void vxlan_socket_remove_softc(struct vxlan_socket *,
+ struct vxlan_softc *);
+
+static struct ifnet *
+ vxlan_multicast_if_ref(struct vxlan_softc *, int);
+static void vxlan_free_multicast(struct vxlan_softc *);
+static int vxlan_setup_multicast_interface(struct vxlan_softc *);
+
+static int vxlan_setup_multicast(struct vxlan_softc *);
+static int vxlan_setup_socket(struct vxlan_softc *);
+static void vxlan_setup_interface(struct vxlan_softc *);
+static int vxlan_valid_init_config(struct vxlan_softc *);
+static void vxlan_init_wait(struct vxlan_softc *);
+static void vxlan_init_complete(struct vxlan_softc *);
+static void vxlan_init(void *);
+static void vxlan_release(struct vxlan_softc *);
+static void vxlan_teardown_wait(struct vxlan_softc *);
+static void vxlan_teardown_complete(struct vxlan_softc *);
+static void vxlan_teardown_locked(struct vxlan_softc *);
+static void vxlan_teardown(struct vxlan_softc *);
+static void vxlan_ifdetach(struct vxlan_softc *, struct ifnet *,
+ struct vxlan_softc_head *);
+static void vxlan_timer(void *);
+
+static int vxlan_ctrl_get_config(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_vni(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_local_port(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_port_range(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *);
+static int vxlan_ctrl_set_ttl(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_learn(struct vxlan_softc *, void *);
+static int vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *);
+static int vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *);
+static int vxlan_ctrl_flush(struct vxlan_softc *, void *);
+static int vxlan_ioctl_drvspec(struct vxlan_softc *,
+ struct ifdrv *, int);
+static int vxlan_ioctl_ifflags(struct vxlan_softc *);
+static int vxlan_ioctl(struct ifnet *, u_long, caddr_t);
+
+#if defined(INET) || defined(INET6)
+static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *);
+static void vxlan_encap_header(struct vxlan_softc *, struct mbuf *,
+ int, uint16_t, uint16_t);
+#endif
+static int vxlan_encap4(struct vxlan_softc *,
+ const union vxlan_sockaddr *, struct mbuf *);
+static int vxlan_encap6(struct vxlan_softc *,
+ const union vxlan_sockaddr *, struct mbuf *);
+static int vxlan_transmit(struct ifnet *, struct mbuf *);
+static void vxlan_qflush(struct ifnet *);
+static void vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *,
+ const struct sockaddr *, void *);
+static int vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **,
+ const struct sockaddr *);
+
+static void vxlan_set_default_config(struct vxlan_softc *);
+static int vxlan_set_user_config(struct vxlan_softc *,
+ struct ifvxlanparam *);
+static int vxlan_clone_create(struct if_clone *, int, caddr_t);
+static void vxlan_clone_destroy(struct ifnet *);
+
+static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *);
+static void vxlan_fakeaddr(struct vxlan_softc *);
+
+static int vxlan_sockaddr_cmp(const union vxlan_sockaddr *,
+ const struct sockaddr *);
+static void vxlan_sockaddr_copy(union vxlan_sockaddr *,
+ const struct sockaddr *);
+static int vxlan_sockaddr_in_equal(const union vxlan_sockaddr *,
+ const struct sockaddr *);
+static void vxlan_sockaddr_in_copy(union vxlan_sockaddr *,
+ const struct sockaddr *);
+static int vxlan_sockaddr_supported(const union vxlan_sockaddr *, int);
+static int vxlan_sockaddr_in_any(const union vxlan_sockaddr *);
+static int vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *);
+
+static int vxlan_can_change_config(struct vxlan_softc *);
+static int vxlan_check_vni(uint32_t);
+static int vxlan_check_ttl(int);
+static int vxlan_check_ftable_timeout(uint32_t);
+static int vxlan_check_ftable_max(uint32_t);
+
+static void vxlan_sysctl_setup(struct vxlan_softc *);
+static void vxlan_sysctl_destroy(struct vxlan_softc *);
+static int vxlan_tunable_int(struct vxlan_softc *, const char *, int);
+
+static void vxlan_ifdetach_event(void *, struct ifnet *);
+static void vxlan_load(void);
+static void vxlan_unload(void);
+static int vxlan_modevent(module_t, int, void *);
+
+static const char vxlan_name[] = "vxlan";
+static MALLOC_DEFINE(M_VXLAN, vxlan_name,
+ "Virtual eXtensible LAN Interface");
+static struct if_clone *vxlan_cloner;
+static struct mtx vxlan_list_mtx;
+static LIST_HEAD(, vxlan_socket) vxlan_socket_list;
+
+static eventhandler_tag vxlan_ifdetach_event_tag;
+
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW, 0,
+ "Virtual eXtensible Local Area Network");
+
+static int vxlan_legacy_port = 0;
+TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port);
+static int vxlan_reuse_port = 0;
+TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port);
+
+/* Default maximum number of addresses in the forwarding table. */
+#ifndef VXLAN_FTABLE_MAX
+#define VXLAN_FTABLE_MAX 2000
+#endif
+
+/* Timeout (in seconds) of addresses learned in the forwarding table. */
+#ifndef VXLAN_FTABLE_TIMEOUT
+#define VXLAN_FTABLE_TIMEOUT (20 * 60)
+#endif
+
+/*
+ * Maximum timeout (in seconds) of addresses learned in the forwarding
+ * table.
+ */
+#ifndef VXLAN_FTABLE_MAX_TIMEOUT
+#define VXLAN_FTABLE_MAX_TIMEOUT (60 * 60 * 24)
+#endif
+
+/* Number of seconds between pruning attempts of the forwarding table. */
+#ifndef VXLAN_FTABLE_PRUNE
+#define VXLAN_FTABLE_PRUNE (5 * 60)
+#endif
+
+static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE;
+
+struct vxlan_control {
+ int (*vxlc_func)(struct vxlan_softc *, void *);
+ int vxlc_argsize;
+ int vxlc_flags;
+#define VXLAN_CTRL_FLAG_COPYIN 0x01
+#define VXLAN_CTRL_FLAG_COPYOUT 0x02
+#define VXLAN_CTRL_FLAG_SUSER 0x04
+};
+
+static const struct vxlan_control vxlan_control_table[] = {
+ [VXLAN_CMD_GET_CONFIG] =
+ { vxlan_ctrl_get_config, sizeof(struct ifvxlancfg),
+ VXLAN_CTRL_FLAG_COPYOUT
+ },
+
+ [VXLAN_CMD_SET_VNI] =
+ { vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_LOCAL_ADDR] =
+ { vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_REMOTE_ADDR] =
+ { vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_LOCAL_PORT] =
+ { vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_REMOTE_PORT] =
+ { vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_PORT_RANGE] =
+ { vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_FTABLE_TIMEOUT] =
+ { vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_FTABLE_MAX] =
+ { vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_MULTICAST_IF] =
+ { vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_TTL] =
+ { vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_LEARN] =
+ { vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_FTABLE_ENTRY_ADD] =
+ { vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_FTABLE_ENTRY_REM] =
+ { vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_FLUSH] =
+ { vxlan_ctrl_flush, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+};
+
+static const int vxlan_control_table_size = nitems(vxlan_control_table);
+
+static int
+vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
+{
+ int i, d;
+
+ for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
+ d = ((int)a[i]) - ((int)b[i]);
+
+ return (d);
+}
+
+static void
+vxlan_ftable_init(struct vxlan_softc *sc)
+{
+ int i;
+
+ sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) *
+ VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK);
+
+ for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++)
+ LIST_INIT(&sc->vxl_ftable[i]);
+ sc->vxl_ftable_hash_key = arc4random();
+}
+
+static void
+vxlan_ftable_fini(struct vxlan_softc *sc)
+{
+ int i;
+
+ for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
+ KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]),
+ ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i));
+ }
+ MPASS(sc->vxl_ftable_cnt == 0);
+
+ free(sc->vxl_ftable, M_VXLAN);
+ sc->vxl_ftable = NULL;
+}
+
+static void
+vxlan_ftable_flush(struct vxlan_softc *sc, int all)
+{
+ struct vxlan_ftable_entry *fe, *tfe;
+ int i;
+
+ for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
+ LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
+ if (all || VXLAN_FE_IS_DYNAMIC(fe))
+ vxlan_ftable_entry_destroy(sc, fe);
+ }
+ }
+}
+
+static void
+vxlan_ftable_expire(struct vxlan_softc *sc)
+{
+ struct vxlan_ftable_entry *fe, *tfe;
+ int i;
+
+ VXLAN_LOCK_WASSERT(sc);
+
+ for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
+ LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
+ if (VXLAN_FE_IS_DYNAMIC(fe) &&
+ time_uptime >= fe->vxlfe_expire)
+ vxlan_ftable_entry_destroy(sc, fe);
+ }
+ }
+}
+
+static int
+vxlan_ftable_update_locked(struct vxlan_softc *sc, const struct sockaddr *sa,
+ const uint8_t *mac, struct rm_priotracker *tracker)
+{
+ union vxlan_sockaddr vxlsa;
+ struct vxlan_ftable_entry *fe;
+ int error;
+
+ VXLAN_LOCK_ASSERT(sc);
+
+again:
+ /*
+ * A forwarding entry for this MAC address might already exist. If
+ * so, update it, otherwise create a new one. We may have to upgrade
+ * the lock if we have to change or create an entry.
+ */
+ fe = vxlan_ftable_entry_lookup(sc, mac);
+ if (fe != NULL) {
+ fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
+
+ if (!VXLAN_FE_IS_DYNAMIC(fe) ||
+ vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, sa))
+ return (0);
+ if (!VXLAN_LOCK_WOWNED(sc)) {
+ VXLAN_RUNLOCK(sc, tracker);
+ VXLAN_WLOCK(sc);
+ sc->vxl_stats.ftable_lock_upgrade_failed++;
+ goto again;
+ }
+ vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, sa);
+ return (0);
+ }
+
+ if (!VXLAN_LOCK_WOWNED(sc)) {
+ VXLAN_RUNLOCK(sc, tracker);
+ VXLAN_WLOCK(sc);
+ sc->vxl_stats.ftable_lock_upgrade_failed++;
+ goto again;
+ }
+
+ if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) {
+ sc->vxl_stats.ftable_nospace++;
+ return (ENOSPC);
+ }
+
+ fe = vxlan_ftable_entry_alloc();
+ if (fe == NULL)
+ return (ENOMEM);
+
+ /*
+ * The source port may be randomly select by the remove host, so
+ * use the port of the default destination address.
+ */
+ vxlan_sockaddr_copy(&vxlsa, sa);
+ vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
+
+ vxlan_ftable_entry_init(sc, fe, mac, &vxlsa.sa,
+ VXLAN_FE_FLAG_DYNAMIC);
+
+ /* The prior lookup failed, so the insert should not. */
+ error = vxlan_ftable_entry_insert(sc, fe);
+ MPASS(error == 0);
+
+ return (0);
+}
+
+static int
+vxlan_ftable_update(struct vxlan_softc *sc, const struct sockaddr *sa,
+ const uint8_t *mac)
+{
+ struct rm_priotracker tracker;
+ int error;
+
+ VXLAN_RLOCK(sc, &tracker);
+ error = vxlan_ftable_update_locked(sc, sa, mac, &tracker);
+ VXLAN_UNLOCK(sc, &tracker);
+
+ return (error);
+}
+
+static int
+vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS)
+{
+ struct rm_priotracker tracker;
+ struct sbuf sb;
+ struct vxlan_softc *sc;
+ struct vxlan_ftable_entry *fe;
+ size_t size;
+ int i, error;
+
+ /*
+ * This is mostly intended for debugging during development. It is
+ * not practical to dump an entire large table this way.
+ */
+
+ sc = arg1;
+ size = PAGE_SIZE; /* Calculate later. */
+
+ sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN);
+ sbuf_putc(&sb, '\n');
+
+ VXLAN_RLOCK(sc, &tracker);
+ for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
+ LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) {
+ if (sbuf_error(&sb) != 0)
+ break;
+ vxlan_ftable_entry_dump(fe, &sb);
+ }
+ }
+ VXLAN_RUNLOCK(sc, &tracker);
+
+ if (sbuf_len(&sb) == 1)
+ sbuf_setpos(&sb, 0);
+
+ sbuf_finish(&sb);
+ error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
+ sbuf_delete(&sb);
+
+ return (error);
+}
+
+static struct vxlan_ftable_entry *
+vxlan_ftable_entry_alloc(void)
+{
+ struct vxlan_ftable_entry *fe;
+
+ fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT);
+
+ return (fe);
+}
+
+static void
+vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe)
+{
+
+ free(fe, M_VXLAN);
+}
+
+static void
+vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe,
+ const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
+{
+
+ fe->vxlfe_flags = flags;
+ fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
+ memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN);
+ vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa);
+}
+
+static void
+vxlan_ftable_entry_destroy(struct vxlan_softc *sc,
+ struct vxlan_ftable_entry *fe)
+{
+
+ sc->vxl_ftable_cnt--;
+ LIST_REMOVE(fe, vxlfe_hash);
+ vxlan_ftable_entry_free(fe);
+}
+
+static int
+vxlan_ftable_entry_insert(struct vxlan_softc *sc,
+ struct vxlan_ftable_entry *fe)
+{
+ struct vxlan_ftable_entry *lfe;
+ uint32_t hash;
+ int dir;
+
+ VXLAN_LOCK_WASSERT(sc);
+ hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac);
+
+ lfe = LIST_FIRST(&sc->vxl_ftable[hash]);
+ if (lfe == NULL) {
+ LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash);
+ goto out;
+ }
+
+ do {
+ dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac);
+ if (dir == 0)
+ return (EEXIST);
+ if (dir > 0) {
+ LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash);
+ goto out;
+ } else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) {
+ LIST_INSERT_AFTER(lfe, fe, vxlfe_hash);
+ goto out;
+ } else
+ lfe = LIST_NEXT(lfe, vxlfe_hash);
+ } while (lfe != NULL);
+
+out:
+ sc->vxl_ftable_cnt++;
+
+ return (0);
+}
+
+static struct vxlan_ftable_entry *
+vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac)
+{
+ struct vxlan_ftable_entry *fe;
+ uint32_t hash;
+ int dir;
+
+ VXLAN_LOCK_ASSERT(sc);
+ hash = VXLAN_SC_FTABLE_HASH(sc, mac);
+
+ LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) {
+ dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac);
+ if (dir == 0)
+ return (fe);
+ if (dir > 0)
+ break;
+ }
+
+ return (NULL);
+}
+
+static void
+vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb)
+{
+ char buf[64];
+ const union vxlan_sockaddr *sa;
+ const void *addr;
+ int i, len, af, width;
+
+ sa = &fe->vxlfe_raddr;
+ af = sa->sa.sa_family;
+ len = sbuf_len(sb);
+
+ sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S',
+ fe->vxlfe_flags);
+
+ for (i = 0; i < ETHER_ADDR_LEN - 1; i++)
+ sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]);
+ sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]);
+
+ if (af == AF_INET) {
+ addr = &sa->in4.sin_addr;
+ width = INET_ADDRSTRLEN - 1;
+ } else {
+ addr = &sa->in6.sin6_addr;
+ width = INET6_ADDRSTRLEN - 1;
+ }
+ inet_ntop(af, addr, buf, sizeof(buf));
+ sbuf_printf(sb, "%*s ", width, buf);
+
+ sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire);
+
+ sbuf_putc(sb, '\n');
+
+ /* Truncate a partial line. */
+ if (sbuf_error(sb) != 0)
+ sbuf_setpos(sb, len);
+}
+
+static struct vxlan_socket *
+vxlan_socket_alloc(const union vxlan_sockaddr *sa)
+{
+ struct vxlan_socket *vso;
+ int i;
+
+ vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO);
+ rm_init(&vso->vxlso_lock, "vxlansorm");
+ refcount_init(&vso->vxlso_refcnt, 0);
+ for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++)
+ LIST_INIT(&vso->vxlso_vni_hash[i]);
+ vso->vxlso_laddr = *sa;
+
+ return (vso);
+}
+
+static void
+vxlan_socket_destroy(struct vxlan_socket *vso)
+{
+ struct socket *so;
+ struct vxlan_socket_mc_info *mc;
+ int i;
+
+ for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
+ mc = &vso->vxlso_mc[i];
+ KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC,
+ ("%s: socket %p mc[%d] still has address",
+ __func__, vso, i));
+ }
+
+ for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
+ KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]),
+ ("%s: socket %p vni_hash[%d] not empty",
+ __func__, vso, i));
+ }
+
+ so = vso->vxlso_sock;
+ if (so != NULL) {
+ vso->vxlso_sock = NULL;
+ soclose(so);
+ }
+
+ rm_destroy(&vso->vxlso_lock);
+ free(vso, M_VXLAN);
+}
+
+static void
+vxlan_socket_release(struct vxlan_socket *vso)
+{
+ int destroy;
+
+ mtx_lock(&vxlan_list_mtx);
+ destroy = VXLAN_SO_RELEASE(vso);
+ if (destroy != 0)
+ LIST_REMOVE(vso, vxlso_entry);
+ mtx_unlock(&vxlan_list_mtx);
+
+ if (destroy != 0)
+ vxlan_socket_destroy(vso);
+}
+
+static struct vxlan_socket *
+vxlan_socket_lookup(union vxlan_sockaddr *vxlsa)
+{
+ struct vxlan_socket *vso;
+
+ mtx_lock(&vxlan_list_mtx);
+ LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) {
+ if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) {
+ VXLAN_SO_ACQUIRE(vso);
+ break;
+ }
+ }
+ mtx_unlock(&vxlan_list_mtx);
+
+ return (vso);
+}
+
+static void
+vxlan_socket_insert(struct vxlan_socket *vso)
+{
+
+ mtx_lock(&vxlan_list_mtx);
+ VXLAN_SO_ACQUIRE(vso);
+ LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry);
+ mtx_unlock(&vxlan_list_mtx);
+}
+
+static int
+vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp)
+{
+ struct thread *td;
+ int error;
+
+ td = curthread;
+
+ error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock,
+ SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
+ if (error) {
+ if_printf(ifp, "cannot create socket: %d\n", error);
+ return (error);
+ }
+
+ error = udp_set_kernel_tunneling(vso->vxlso_sock,
+ vxlan_rcv_udp_packet, vso);
+ if (error) {
+ if_printf(ifp, "cannot set tunneling function: %d\n", error);
+ return (error);
+ }
+
+ if (vxlan_reuse_port != 0) {
+ struct sockopt sopt;
+ int val = 1;
+
+ bzero(&sopt, sizeof(sopt));
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_IP;
+ sopt.sopt_name = SO_REUSEPORT;
+ sopt.sopt_val = &val;
+ sopt.sopt_valsize = sizeof(val);
+ error = sosetopt(vso->vxlso_sock, &sopt);
+ if (error) {
+ if_printf(ifp,
+ "cannot set REUSEADDR socket opt: %d\n", error);
+ return (error);
+ }
+ }
+
+ return (0);
+}
+
+static int
+vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp)
+{
+ union vxlan_sockaddr laddr;
+ struct thread *td;
+ int error;
+
+ td = curthread;
+ laddr = vso->vxlso_laddr;
+
+ error = sobind(vso->vxlso_sock, &laddr.sa, td);
+ if (error) {
+ if (error != EADDRINUSE)
+ if_printf(ifp, "cannot bind socket: %d\n", error);
+ return (error);
+ }
+
+ return (0);
+}
+
+static int
+vxlan_socket_create(struct ifnet *ifp, int multicast,
+ const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop)
+{
+ union vxlan_sockaddr laddr;
+ struct vxlan_socket *vso;
+ int error;
+
+ laddr = *saddr;
+
+ /*
+ * If this socket will be multicast, then only the local port
+ * must be specified when binding.
+ */
+ if (multicast != 0) {
+ if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
+ laddr.in4.sin_addr.s_addr = INADDR_ANY;
+#ifdef INET6
+ else
+ laddr.in6.sin6_addr = in6addr_any;
+#endif
+ }
+
+ vso = vxlan_socket_alloc(&laddr);
+ if (vso == NULL)
+ return (ENOMEM);
+
+ error = vxlan_socket_init(vso, ifp);
+ if (error)
+ goto fail;
+
+ error = vxlan_socket_bind(vso, ifp);
+ if (error)
+ goto fail;
+
+ /*
+ * There is a small window between the bind completing and
+ * inserting the socket, so that a concurrent create may fail.
+ * Let's not worry about that for now.
+ */
+ vxlan_socket_insert(vso);
+ *vsop = vso;
+
+ return (0);
+
+fail:
+ vxlan_socket_destroy(vso);
+
+ return (error);
+}
+
+static void
+vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp,
+ struct vxlan_softc_head *list)
+{
+ struct rm_priotracker tracker;
+ struct vxlan_softc *sc;
+ int i;
+
+ VXLAN_SO_RLOCK(vso, &tracker);
+ for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
+ LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry)
+ vxlan_ifdetach(sc, ifp, list);
+ }
+ VXLAN_SO_RUNLOCK(vso, &tracker);
+}
+
+static struct vxlan_socket *
+vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa)
+{
+ struct vxlan_socket *vso;
+ union vxlan_sockaddr laddr;
+
+ laddr = *vxlsa;
+
+ if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
+ laddr.in4.sin_addr.s_addr = INADDR_ANY;
+#ifdef INET6
+ else
+ laddr.in6.sin6_addr = in6addr_any;
+#endif
+
+ vso = vxlan_socket_lookup(&laddr);
+
+ return (vso);
+}
+
+static int
+vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc,
+ const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
+ int ifidx)
+{
+
+ if (!vxlan_sockaddr_in_any(local) &&
+ !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa))
+ return (0);
+ if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa))
+ return (0);
+ if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx)
+ return (0);
+
+ return (1);
+}
+
+static int
+vxlan_socket_mc_join_group(struct vxlan_socket *vso,
+ const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
+ int *ifidx, union vxlan_sockaddr *source)
+{
+ struct sockopt sopt;
+ int error;
+
+ *source = *local;
+
+ if (VXLAN_SOCKADDR_IS_IPV4(group)) {
+ struct ip_mreq mreq;
+
+ mreq.imr_multiaddr = group->in4.sin_addr;
+ mreq.imr_interface = local->in4.sin_addr;
+
+ bzero(&sopt, sizeof(sopt));
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_IP;
+ sopt.sopt_name = IP_ADD_MEMBERSHIP;
+ sopt.sopt_val = &mreq;
+ sopt.sopt_valsize = sizeof(mreq);
+ error = sosetopt(vso->vxlso_sock, &sopt);
+ if (error)
+ return (error);
+
+ /*
+ * BMV: Ideally, there would be a formal way for us to get
+ * the local interface that was selected based on the
+ * imr_interface address. We could then update *ifidx so
+ * vxlan_sockaddr_mc_info_match() would return a match for
+ * later creates that explicitly set the multicast interface.
+ *
+ * If we really need to, we can of course look in the INP's
+ * membership list:
+ * sotoinpcb(vso->vxlso_sock)->inp_moptions->
+ * imo_membership[]->inm_ifp
+ * similarly to imo_match_group().
+ */
+ source->in4.sin_addr = local->in4.sin_addr;
+
+ } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
+ struct ipv6_mreq mreq;
+
+ mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
+ mreq.ipv6mr_interface = *ifidx;
+
+ bzero(&sopt, sizeof(sopt));
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_IPV6;
+ sopt.sopt_name = IPV6_JOIN_GROUP;
+ sopt.sopt_val = &mreq;
+ sopt.sopt_valsize = sizeof(mreq);
+ error = sosetopt(vso->vxlso_sock, &sopt);
+ if (error)
+ return (error);
+
+ /*
+ * BMV: As with IPv4, we would really like to know what
+ * interface in6p_lookup_mcast_ifp() selected.
+ */
+ } else
+ error = EAFNOSUPPORT;
+
+ return (error);
+}
+
+static int
+vxlan_socket_mc_leave_group(struct vxlan_socket *vso,
+ const union vxlan_sockaddr *group, const union vxlan_sockaddr *source,
+ int ifidx)
+{
+ struct sockopt sopt;
+ int error;
+
+ bzero(&sopt, sizeof(sopt));
+ sopt.sopt_dir = SOPT_SET;
+
+ if (VXLAN_SOCKADDR_IS_IPV4(group)) {
+ struct ip_mreq mreq;
+
+ mreq.imr_multiaddr = group->in4.sin_addr;
+ mreq.imr_interface = source->in4.sin_addr;
+
+ sopt.sopt_level = IPPROTO_IP;
+ sopt.sopt_name = IP_DROP_MEMBERSHIP;
+ sopt.sopt_val = &mreq;
+ sopt.sopt_valsize = sizeof(mreq);
+ error = sosetopt(vso->vxlso_sock, &sopt);
+
+ } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
+ struct ipv6_mreq mreq;
+
+ mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
+ mreq.ipv6mr_interface = ifidx;
+
+ sopt.sopt_level = IPPROTO_IPV6;
+ sopt.sopt_name = IPV6_LEAVE_GROUP;
+ sopt.sopt_val = &mreq;
+ sopt.sopt_valsize = sizeof(mreq);
+ error = sosetopt(vso->vxlso_sock, &sopt);
+
+ } else
+ error = EAFNOSUPPORT;
+
+ return (error);
+}
+
+static int
+vxlan_socket_mc_add_group(struct vxlan_socket *vso,
+ const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
+ int ifidx, int *idx)
+{
+ union vxlan_sockaddr source;
+ struct vxlan_socket_mc_info *mc;
+ int i, empty, error;
+
+ /*
+ * Within a socket, the same multicast group may be used by multiple
+ * interfaces, each with a different network identifier. But a socket
+ * may only join a multicast group once, so keep track of the users
+ * here.
+ */
+
+ VXLAN_SO_WLOCK(vso);
+ for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
+ mc = &vso->vxlso_mc[i];
+
+ if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
+ empty++;
+ continue;
+ }
+
+ if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx))
+ goto out;
+ }
+ VXLAN_SO_WUNLOCK(vso);
+
+ if (empty == 0)
+ return (ENOSPC);
+
+ error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source);
+ if (error)
+ return (error);
+
+ VXLAN_SO_WLOCK(vso);
+ for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
+ mc = &vso->vxlso_mc[i];
+
+ if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
+ vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa);
+ vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa);
+ mc->vxlsomc_ifidx = ifidx;
+ goto out;
+ }
+ }
+ VXLAN_SO_WUNLOCK(vso);
+
+ error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx);
+ MPASS(error == 0);
+
+ return (ENOSPC);
+
+out:
+ mc->vxlsomc_users++;
+ VXLAN_SO_WUNLOCK(vso);
+
+ *idx = i;
+
+ return (0);
+}
+
+static void
+vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx)
+{
+ union vxlan_sockaddr group, source;
+ struct vxlan_socket_mc_info *mc;
+ int ifidx, leave;
+
+ KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS,
+ ("%s: vso %p idx %d out of bounds", __func__, vso, idx));
+
+ leave = 0;
+ mc = &vso->vxlso_mc[idx];
+
+ VXLAN_SO_WLOCK(vso);
+ mc->vxlsomc_users--;
+ if (mc->vxlsomc_users == 0) {
+ group = mc->vxlsomc_gaddr;
+ source = mc->vxlsomc_saddr;
+ ifidx = mc->vxlsomc_ifidx;
+ bzero(mc, sizeof(*mc));
+ leave = 1;
+ }
+ VXLAN_SO_WUNLOCK(vso);
+
+ if (leave != 0) {
+ /*
+ * Our socket's membership in this group may have already
+ * been removed if we joined through an interface that's
+ * been detached.
+ */
+ vxlan_socket_mc_leave_group(vso, &group, &source, ifidx);
+ }
+}
+
+static struct vxlan_softc *
+vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni)
+{
+ struct vxlan_softc *sc;
+ uint32_t hash;
+
+ VXLAN_SO_LOCK_ASSERT(vso);
+ hash = VXLAN_SO_VNI_HASH(vni);
+
+ LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) {
+ if (sc->vxl_vni == vni) {
+ VXLAN_ACQUIRE(sc);
+ break;
+ }
+ }
+
+ return (sc);
+}
+
+static struct vxlan_softc *
+vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni)
+{
+ struct rm_priotracker tracker;
+ struct vxlan_softc *sc;
+
+ VXLAN_SO_RLOCK(vso, &tracker);
+ sc = vxlan_socket_lookup_softc_locked(vso, vni);
+ VXLAN_SO_RUNLOCK(vso, &tracker);
+
+ return (sc);
+}
+
+static int
+vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
+{
+ struct vxlan_softc *tsc;
+ uint32_t vni, hash;
+
+ vni = sc->vxl_vni;
+ hash = VXLAN_SO_VNI_HASH(vni);
+
+ VXLAN_SO_WLOCK(vso);
+ tsc = vxlan_socket_lookup_softc_locked(vso, vni);
+ if (tsc != NULL) {
+ VXLAN_SO_WUNLOCK(vso);
+ vxlan_release(tsc);
+ return (EEXIST);
+ }
+
+ VXLAN_ACQUIRE(sc);
+ LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry);
+ VXLAN_SO_WUNLOCK(vso);
+
+ return (0);
+}
+
+static void
+vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
+{
+
+ VXLAN_SO_WLOCK(vso);
+ LIST_REMOVE(sc, vxl_entry);
+ VXLAN_SO_WUNLOCK(vso);
+
+ vxlan_release(sc);
+}
+
+static struct ifnet *
+vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4)
+{
+ struct ifnet *ifp;
+
+ VXLAN_LOCK_ASSERT(sc);
+
+ if (ipv4 && sc->vxl_im4o != NULL)
+ ifp = sc->vxl_im4o->imo_multicast_ifp;
+ else if (!ipv4 && sc->vxl_im6o != NULL)
+ ifp = sc->vxl_im6o->im6o_multicast_ifp;
+ else
+ ifp = NULL;
+
+ if (ifp != NULL)
+ if_ref(ifp);
+
+ return (ifp);
+}
+
+static void
+vxlan_free_multicast(struct vxlan_softc *sc)
+{
+
+ if (sc->vxl_mc_ifp != NULL) {
+ if_rele(sc->vxl_mc_ifp);
+ sc->vxl_mc_ifp = NULL;
+ sc->vxl_mc_ifindex = 0;
+ }
+
+ if (sc->vxl_im4o != NULL) {
+ free(sc->vxl_im4o, M_VXLAN);
+ sc->vxl_im4o = NULL;
+ }
+
+ if (sc->vxl_im6o != NULL) {
+ free(sc->vxl_im6o, M_VXLAN);
+ sc->vxl_im6o = NULL;
+ }
+}
+
+static int
+vxlan_setup_multicast_interface(struct vxlan_softc *sc)
+{
+ struct ifnet *ifp;
+
+ ifp = ifunit_ref(sc->vxl_mc_ifname);
+ if (ifp == NULL) {
+ if_printf(sc->vxl_ifp, "multicast interfaces %s does "
+ "not exist\n", sc->vxl_mc_ifname);
+ return (ENOENT);
+ }
+
+ if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+ if_printf(sc->vxl_ifp, "interface %s does not support "
+ "multicast\n", sc->vxl_mc_ifname);
+ if_rele(ifp);
+ return (ENOTSUP);
+ }
+
+ sc->vxl_mc_ifp = ifp;
+ sc->vxl_mc_ifindex = ifp->if_index;
+
+ return (0);
+}
+
+static int
+vxlan_setup_multicast(struct vxlan_softc *sc)
+{
+ const union vxlan_sockaddr *group;
+ int error;
+
+ group = &sc->vxl_dst_addr;
+ error = 0;
+
+ if (sc->vxl_mc_ifname[0] != '\0') {
+ error = vxlan_setup_multicast_interface(sc);
+ if (error)
+ return (error);
+ }
+
+ /*
+ * Initialize an multicast options structure that is sufficiently
+ * populated for use in the respective IP output routine. This
+ * structure is typically stored in the socket, but our sockets
+ * may be shared among multiple interfaces.
+ */
+ if (VXLAN_SOCKADDR_IS_IPV4(group)) {
+ sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN,
+ M_ZERO | M_WAITOK);
+ sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp;
+ sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
+ sc->vxl_im4o->imo_multicast_vif = -1;
+ } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
+ sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN,
+ M_ZERO | M_WAITOK);
+ sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp;
+ sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
+ }
+
+ return (error);
+}
+
+static int
+vxlan_setup_socket(struct vxlan_softc *sc)
+{
+ struct vxlan_socket *vso;
+ struct ifnet *ifp;
+ union vxlan_sockaddr *saddr, *daddr;
+ int multicast, error;
+
+ vso = NULL;
+ ifp = sc->vxl_ifp;
+ saddr = &sc->vxl_src_addr;
+ daddr = &sc->vxl_dst_addr;
+
+ multicast = vxlan_sockaddr_in_multicast(daddr);
+ MPASS(multicast != -1);
+ sc->vxl_vso_mc_index = -1;
+
+ /*
+ * Try to create the socket. If that fails, attempt to use an
+ * existing socket.
+ */
+ error = vxlan_socket_create(ifp, multicast, saddr, &vso);
+ if (error) {
+ if (multicast != 0)
+ vso = vxlan_socket_mc_lookup(saddr);
+ else
+ vso = vxlan_socket_lookup(saddr);
+
+ if (vso == NULL) {
+ if_printf(ifp, "cannot create socket (error: %d), "
+ "and no existing socket found\n", error);
+ goto out;
+ }
+ }
+
+ if (multicast != 0) {
+ error = vxlan_setup_multicast(sc);
+ if (error)
+ goto out;
+
+ error = vxlan_socket_mc_add_group(vso, daddr, saddr,
+ sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index);
+ if (error)
+ goto out;
+ }
+
+ sc->vxl_sock = vso;
+ error = vxlan_socket_insert_softc(vso, sc);
+ if (error) {
+ sc->vxl_sock = NULL;
+ if_printf(ifp, "network identifier %d already exists in "
+ "this socket\n", sc->vxl_vni);
+ goto out;
+ }
+
+ return (0);
+
+out:
+ if (vso != NULL) {
+ if (sc->vxl_vso_mc_index != -1) {
+ vxlan_socket_mc_release_group_by_idx(vso,
+ sc->vxl_vso_mc_index);
+ sc->vxl_vso_mc_index = -1;
+ }
+ if (multicast != 0)
+ vxlan_free_multicast(sc);
+ vxlan_socket_release(vso);
+ }
+
+ return (error);
+}
+
+static void
+vxlan_setup_interface(struct vxlan_softc *sc)
+{
+ struct ifnet *ifp;
+
+ ifp = sc->vxl_ifp;
+ ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr);
+
+ if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0)
+ ifp->if_hdrlen += sizeof(struct ip);
+ else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0)
+ ifp->if_hdrlen += sizeof(struct ip6_hdr);
+}
+
+static int
+vxlan_valid_init_config(struct vxlan_softc *sc)
+{
+ const char *reason;
+
+ if (vxlan_check_vni(sc->vxl_vni) != 0) {
+ reason = "invalid virtual network identifier specified";
+ goto fail;
+ }
+
+ if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) {
+ reason = "source address type is not supported";
+ goto fail;
+ }
+
+ if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) {
+ reason = "destination address type is not supported";
+ goto fail;
+ }
+
+ if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) {
+ reason = "no valid destination address specified";
+ goto fail;
+ }
+
+ if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 &&
+ sc->vxl_mc_ifname[0] != '\0') {
+ reason = "can only specify interface with a group address";
+ goto fail;
+ }
+
+ if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
+ if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^
+ VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) {
+ reason = "source and destination address must both "
+ "be either IPv4 or IPv6";
+ goto fail;
+ }
+ }
+
+ if (sc->vxl_src_addr.in4.sin_port == 0) {
+ reason = "local port not specified";
+ goto fail;
+ }
+
+ if (sc->vxl_dst_addr.in4.sin_port == 0) {
+ reason = "remote port not specified";
+ goto fail;
+ }
+
+ return (0);
+
+fail:
+ if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason);
+ return (EINVAL);
+}
+
+static void
+vxlan_init_wait(struct vxlan_softc *sc)
+{
+
+ VXLAN_LOCK_WASSERT(sc);
+ while (sc->vxl_flags & VXLAN_FLAG_INIT)
+ rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz);
+}
+
+static void
+vxlan_init_complete(struct vxlan_softc *sc)
+{
+
+ VXLAN_WLOCK(sc);
+ sc->vxl_flags &= ~VXLAN_FLAG_INIT;
+ wakeup(sc);
+ VXLAN_WUNLOCK(sc);
+}
+
+static void
+vxlan_init(void *xsc)
+{
+ static const uint8_t empty_mac[ETHER_ADDR_LEN];
+ struct vxlan_softc *sc;
+ struct ifnet *ifp;
+
+ sc = xsc;
+ ifp = sc->vxl_ifp;
+
+ VXLAN_WLOCK(sc);
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ VXLAN_WUNLOCK(sc);
+ return;
+ }
+ sc->vxl_flags |= VXLAN_FLAG_INIT;
+ VXLAN_WUNLOCK(sc);
+
+ if (vxlan_valid_init_config(sc) != 0)
+ goto out;
+
+ vxlan_setup_interface(sc);
+
+ if (vxlan_setup_socket(sc) != 0)
+ goto out;
+
+ /* Initialize the default forwarding entry. */
+ vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac,
+ &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC);
+
+ VXLAN_WLOCK(sc);
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz,
+ vxlan_timer, sc);
+ VXLAN_WUNLOCK(sc);
+
+out:
+ vxlan_init_complete(sc);
+}
+
+static void
+vxlan_release(struct vxlan_softc *sc)
+{
+
+ /*
+ * The softc may be destroyed as soon as we release our reference,
+ * so we cannot serialize the wakeup with the softc lock. We use a
+ * timeout in our sleeps so a missed wakeup is unfortunate but not
+ * fatal.
+ */
+ if (VXLAN_RELEASE(sc) != 0)
+ wakeup(sc);
+}
+
+static void
+vxlan_teardown_wait(struct vxlan_softc *sc)
+{
+
+ VXLAN_LOCK_WASSERT(sc);
+ while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
+ rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz);
+}
+
+static void
+vxlan_teardown_complete(struct vxlan_softc *sc)
+{
+
+ VXLAN_WLOCK(sc);
+ sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN;
+ wakeup(sc);
+ VXLAN_WUNLOCK(sc);
+}
+
+static void
+vxlan_teardown_locked(struct vxlan_softc *sc)
+{
+ struct ifnet *ifp;
+ struct vxlan_socket *vso;
+
+ ifp = sc->vxl_ifp;
+
+ VXLAN_LOCK_WASSERT(sc);
+ MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN);
+
+ ifp->if_flags &= ~IFF_UP;
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ callout_stop(&sc->vxl_callout);
+ vso = sc->vxl_sock;
+ sc->vxl_sock = NULL;
+
+ VXLAN_WUNLOCK(sc);
+
+ if (vso != NULL) {
+ vxlan_socket_remove_softc(vso, sc);
+
+ if (sc->vxl_vso_mc_index != -1) {
+ vxlan_socket_mc_release_group_by_idx(vso,
+ sc->vxl_vso_mc_index);
+ sc->vxl_vso_mc_index = -1;
+ }
+ }
+
+ VXLAN_WLOCK(sc);
+ while (sc->vxl_refcnt != 0)
+ rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz);
+ VXLAN_WUNLOCK(sc);
+
+ callout_drain(&sc->vxl_callout);
+
+ vxlan_free_multicast(sc);
+ if (vso != NULL)
+ vxlan_socket_release(vso);
+
+ vxlan_teardown_complete(sc);
+}
+
+static void
+vxlan_teardown(struct vxlan_softc *sc)
+{
+
+ VXLAN_WLOCK(sc);
+ if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) {
+ vxlan_teardown_wait(sc);
+ VXLAN_WUNLOCK(sc);
+ return;
+ }
+
+ sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
+ vxlan_teardown_locked(sc);
+}
+
+static void
+vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp,
+ struct vxlan_softc_head *list)
+{
+
+ VXLAN_WLOCK(sc);
+
+ if (sc->vxl_mc_ifp != ifp)
+ goto out;
+ if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
+ goto out;
+
+ sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
+ LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list);
+
+out:
+ VXLAN_WUNLOCK(sc);
+}
+
+static void
+vxlan_timer(void *xsc)
+{
+ struct vxlan_softc *sc;
+
+ sc = xsc;
+ VXLAN_LOCK_WASSERT(sc);
+
+ vxlan_ftable_expire(sc);
+ callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz);
+}
+
+static int
+vxlan_ioctl_ifflags(struct vxlan_softc *sc)
+{
+ struct ifnet *ifp;
+
+ ifp = sc->vxl_ifp;
+
+ if (ifp->if_flags & IFF_UP) {
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ vxlan_init(sc);
+ } else {
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ vxlan_teardown(sc);
+ }
+
+ return (0);
+}
+
+static int
+vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg)
+{
+ struct rm_priotracker tracker;
+ struct ifvxlancfg *cfg;
+
+ cfg = arg;
+ bzero(cfg, sizeof(*cfg));
+
+ VXLAN_RLOCK(sc, &tracker);
+ cfg->vxlc_vni = sc->vxl_vni;
+ memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr,
+ sizeof(union vxlan_sockaddr));
+ memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr,
+ sizeof(union vxlan_sockaddr));
+ cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex;
+ cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt;
+ cfg->vxlc_ftable_max = sc->vxl_ftable_max;
+ cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout;
+ cfg->vxlc_port_min = sc->vxl_min_port;
+ cfg->vxlc_port_max = sc->vxl_max_port;
+ cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0;
+ cfg->vxlc_ttl = sc->vxl_ttl;
+ VXLAN_RUNLOCK(sc, &tracker);
+
+ return (0);
+}
+
+static int
+vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int error;
+
+ cmd = arg;
+
+ if (vxlan_check_vni(cmd->vxlcmd_vni) != 0)
+ return (EINVAL);
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_can_change_config(sc)) {
+ sc->vxl_vni = cmd->vxlcmd_vni;
+ error = 0;
+ } else
+ error = EBUSY;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ union vxlan_sockaddr *vxlsa;
+ int error;
+
+ cmd = arg;
+ vxlsa = &cmd->vxlcmd_sa;
+
+ if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
+ return (EINVAL);
+ if (vxlan_sockaddr_in_multicast(vxlsa) != 0)
+ return (EINVAL);
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_can_change_config(sc)) {
+ vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa);
+ error = 0;
+ } else
+ error = EBUSY;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ union vxlan_sockaddr *vxlsa;
+ int error;
+
+ cmd = arg;
+ vxlsa = &cmd->vxlcmd_sa;
+
+ if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
+ return (EINVAL);
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_can_change_config(sc)) {
+ vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa);
+ error = 0;
+ } else
+ error = EBUSY;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int error;
+
+ cmd = arg;
+
+ if (cmd->vxlcmd_port == 0)
+ return (EINVAL);
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_can_change_config(sc)) {
+ sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port);
+ error = 0;
+ } else
+ error = EBUSY;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int error;
+
+ cmd = arg;
+
+ if (cmd->vxlcmd_port == 0)
+ return (EINVAL);
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_can_change_config(sc)) {
+ sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port);
+ error = 0;
+ } else
+ error = EBUSY;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ uint16_t min, max;
+ int error;
+
+ cmd = arg;
+ min = cmd->vxlcmd_port_min;
+ max = cmd->vxlcmd_port_max;
+
+ if (max < min)
+ return (EINVAL);
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_can_change_config(sc)) {
+ sc->vxl_min_port = min;
+ sc->vxl_max_port = max;
+ error = 0;
+ } else
+ error = EBUSY;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int error;
+
+ cmd = arg;
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) {
+ sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout;
+ error = 0;
+ } else
+ error = EINVAL;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int error;
+
+ cmd = arg;
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) {
+ sc->vxl_ftable_max = cmd->vxlcmd_ftable_max;
+ error = 0;
+ } else
+ error = EINVAL;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int error;
+
+ cmd = arg;
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_can_change_config(sc)) {
+ strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ);
+ error = 0;
+ } else
+ error = EBUSY;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int error;
+
+ cmd = arg;
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) {
+ sc->vxl_ttl = cmd->vxlcmd_ttl;
+ if (sc->vxl_im4o != NULL)
+ sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
+ if (sc->vxl_im6o != NULL)
+ sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
+ error = 0;
+ } else
+ error = EINVAL;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+
+ cmd = arg;
+
+ VXLAN_WLOCK(sc);
+ if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN)
+ sc->vxl_flags |= VXLAN_FLAG_LEARN;
+ else
+ sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
+ VXLAN_WUNLOCK(sc);
+
+ return (0);
+}
+
+static int
+vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg)
+{
+ union vxlan_sockaddr vxlsa;
+ struct ifvxlancmd *cmd;
+ struct vxlan_ftable_entry *fe;
+ int error;
+
+ cmd = arg;
+ vxlsa = cmd->vxlcmd_sa;
+
+ if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa))
+ return (EINVAL);
+ if (vxlan_sockaddr_in_any(&vxlsa) != 0)
+ return (EINVAL);
+ if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
+ return (EINVAL);
+ /* BMV: We could support both IPv4 and IPv6 later. */
+ if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family)
+ return (EAFNOSUPPORT);
+
+ fe = vxlan_ftable_entry_alloc();
+ if (fe == NULL)
+ return (ENOMEM);
+
+ if (vxlsa.in4.sin_port == 0)
+ vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
+
+ vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa,
+ VXLAN_FE_FLAG_STATIC);
+
+ VXLAN_WLOCK(sc);
+ error = vxlan_ftable_entry_insert(sc, fe);
+ VXLAN_WUNLOCK(sc);
+
+ if (error)
+ vxlan_ftable_entry_free(fe);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ struct vxlan_ftable_entry *fe;
+ int error;
+
+ cmd = arg;
+
+ VXLAN_WLOCK(sc);
+ fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac);
+ if (fe != NULL) {
+ vxlan_ftable_entry_destroy(sc, fe);
+ error = 0;
+ } else
+ error = ENOENT;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int all;
+
+ cmd = arg;
+ all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL;
+
+ VXLAN_WLOCK(sc);
+ vxlan_ftable_flush(sc, all);
+ VXLAN_WUNLOCK(sc);
+
+ return (0);
+}
+
+static int
+vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get)
+{
+ const struct vxlan_control *vc;
+ union {
+ struct ifvxlancfg cfg;
+ struct ifvxlancmd cmd;
+ } args;
+ int out, error;
+
+ if (ifd->ifd_cmd >= vxlan_control_table_size)
+ return (EINVAL);
+
+ bzero(&args, sizeof(args));
+ vc = &vxlan_control_table[ifd->ifd_cmd];
+ out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0;
+
+ if ((get != 0 && out == 0) || (get == 0 && out != 0))
+ return (EINVAL);
+
+ if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) {
+ error = priv_check(curthread, PRIV_NET_VXLAN);
+ if (error)
+ return (error);
+ }
+
+ if (ifd->ifd_len != vc->vxlc_argsize ||
+ ifd->ifd_len > sizeof(args))
+ return (EINVAL);
+
+ if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) {
+ error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
+ if (error)
+ return (error);
+ }
+
+ error = vc->vxlc_func(sc, &args);
+ if (error)
+ return (error);
+
+ if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) {
+ error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
+ if (error)
+ return (error);
+ }
+
+ return (0);
+}
+
+static int
+vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct vxlan_softc *sc;
+ struct ifreq *ifr;
+ struct ifdrv *ifd;
+ int error;
+
+ sc = ifp->if_softc;
+ ifr = (struct ifreq *) data;
+ ifd = (struct ifdrv *) data;
+
+ switch (cmd) {
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ error = 0;
+ break;
+
+ case SIOCGDRVSPEC:
+ case SIOCSDRVSPEC:
+ error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC);
+ break;
+
+ case SIOCSIFFLAGS:
+ error = vxlan_ioctl_ifflags(sc);
+ break;
+ default:
+ error = ether_ioctl(ifp, cmd, data);
+ break;
+ }
+
+ return (error);
+}
+
+#if defined(INET) || defined(INET6)
+static uint16_t
+vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m)
+{
+ int range;
+ uint32_t hash;
+
+ range = sc->vxl_max_port - sc->vxl_min_port + 1;
+
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE &&
+ M_HASHTYPE_GET(m) != M_HASHTYPE_OPAQUE)
+ hash = m->m_pkthdr.flowid;
+ else
+ hash = jenkins_hash(m->m_data, ETHER_HDR_LEN,
+ sc->vxl_port_hash_key);
+
+ return (sc->vxl_min_port + (hash % range));
+}
+
+static void
+vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff,
+ uint16_t srcport, uint16_t dstport)
+{
+ struct vxlanudphdr *hdr;
+ struct udphdr *udph;
+ struct vxlan_header *vxh;
+ int len;
+
+ len = m->m_pkthdr.len - ipoff;
+ MPASS(len >= sizeof(struct vxlanudphdr));
+ hdr = mtodo(m, ipoff);
+
+ udph = &hdr->vxlh_udp;
+ udph->uh_sport = srcport;
+ udph->uh_dport = dstport;
+ udph->uh_ulen = htons(len);
+ udph->uh_sum = 0;
+
+ vxh = &hdr->vxlh_hdr;
+ vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI);
+ vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT);
+}
+#endif
+
+static int
+vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
+ struct mbuf *m)
+{
+#ifdef INET
+ struct ifnet *ifp;
+ struct ip *ip;
+ struct in_addr srcaddr, dstaddr;
+ uint16_t srcport, dstport;
+ int len, mcast, error;
+
+ ifp = sc->vxl_ifp;
+ srcaddr = sc->vxl_src_addr.in4.sin_addr;
+ srcport = vxlan_pick_source_port(sc, m);
+ dstaddr = fvxlsa->in4.sin_addr;
+ dstport = fvxlsa->in4.sin_port;
+
+ M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr),
+ M_NOWAIT);
+ if (m == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENOBUFS);
+ }
+
+ len = m->m_pkthdr.len;
+
+ ip = mtod(m, struct ip *);
+ ip->ip_tos = 0;
+ ip->ip_len = htons(len);
+ ip->ip_off = 0;
+ ip->ip_ttl = sc->vxl_ttl;
+ ip->ip_p = IPPROTO_UDP;
+ ip->ip_sum = 0;
+ ip->ip_src = srcaddr;
+ ip->ip_dst = dstaddr;
+
+ vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport);
+
+ mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
+ m->m_flags &= ~(M_MCAST | M_BCAST);
+
+ error = ip_output(m, NULL, NULL, 0, sc->vxl_im4o, NULL);
+ if (error == 0) {
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
+ if (mcast != 0)
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
+ } else
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+
+ return (error);
+#else
+ m_freem(m);
+ return (ENOTSUP);
+#endif
+}
+
+static int
+vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
+ struct mbuf *m)
+{
+#ifdef INET6
+ struct ifnet *ifp;
+ struct ip6_hdr *ip6;
+ const struct in6_addr *srcaddr, *dstaddr;
+ uint16_t srcport, dstport;
+ int len, mcast, error;
+
+ ifp = sc->vxl_ifp;
+ srcaddr = &sc->vxl_src_addr.in6.sin6_addr;
+ srcport = vxlan_pick_source_port(sc, m);
+ dstaddr = &fvxlsa->in6.sin6_addr;
+ dstport = fvxlsa->in6.sin6_port;
+
+ M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr),
+ M_NOWAIT);
+ if (m == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENOBUFS);
+ }
+
+ len = m->m_pkthdr.len;
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6->ip6_flow = 0; /* BMV: Keep in forwarding entry? */
+ ip6->ip6_vfc = IPV6_VERSION;
+ ip6->ip6_plen = 0;
+ ip6->ip6_nxt = IPPROTO_UDP;
+ ip6->ip6_hlim = sc->vxl_ttl;
+ ip6->ip6_src = *srcaddr;
+ ip6->ip6_dst = *dstaddr;
+
+ vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport);
+
+ /*
+ * XXX BMV We need support for RFC6935 before we can send and
+ * receive IPv6 UDP packets with a zero checksum.
+ */
+ {
+ struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));
+ hdr->uh_sum = in6_cksum_pseudo(ip6,
+ m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
+ m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ }
+
+ mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
+ m->m_flags &= ~(M_MCAST | M_BCAST);
+
+ error = ip6_output(m, NULL, NULL, 0, sc->vxl_im6o, NULL, NULL);
+ if (error == 0) {
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
+ if (mcast != 0)
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
+ } else
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+
+ return (error);
+#else
+ m_freem(m);
+ return (ENOTSUP);
+#endif
+}
+
+static int
+vxlan_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+ struct rm_priotracker tracker;
+ union vxlan_sockaddr vxlsa;
+ struct vxlan_softc *sc;
+ struct vxlan_ftable_entry *fe;
+ struct ifnet *mcifp;
+ struct ether_header *eh;
+ int ipv4, error;
+
+ sc = ifp->if_softc;
+ eh = mtod(m, struct ether_header *);
+ fe = NULL;
+ mcifp = NULL;
+
+ ETHER_BPF_MTAP(ifp, m);
+
+ VXLAN_RLOCK(sc, &tracker);
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ VXLAN_RUNLOCK(sc, &tracker);
+ m_freem(m);
+ return (ENETDOWN);
+ }
+
+ if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
+ fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost);
+ if (fe == NULL)
+ fe = &sc->vxl_default_fe;
+ vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa);
+
+ ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0;
+ if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
+ mcifp = vxlan_multicast_if_ref(sc, ipv4);
+
+ VXLAN_ACQUIRE(sc);
+ VXLAN_RUNLOCK(sc, &tracker);
+
+ if (ipv4 != 0)
+ error = vxlan_encap4(sc, &vxlsa, m);
+ else
+ error = vxlan_encap6(sc, &vxlsa, m);
+
+ vxlan_release(sc);
+ if (mcifp != NULL)
+ if_rele(mcifp);
+
+ return (error);
+}
+
+static void
+vxlan_qflush(struct ifnet *ifp __unused)
+{
+}
+
+static void
+vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb,
+ const struct sockaddr *srcsa, void *xvso)
+{
+ struct vxlan_socket *vso;
+ struct vxlan_header *vxh, vxlanhdr;
+ uint32_t vni;
+ int error;
+
+ M_ASSERTPKTHDR(m);
+ vso = xvso;
+ offset += sizeof(struct udphdr);
+
+ if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header))
+ goto out;
+
+ if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) {
+ m_copydata(m, offset, sizeof(struct vxlan_header),
+ (caddr_t) &vxlanhdr);
+ vxh = &vxlanhdr;
+ } else
+ vxh = mtodo(m, offset);
+
+ /*
+ * Drop if there is a reserved bit set in either the flags or VNI
+ * fields of the header. This goes against the specification, but
+ * a bit set may indicate an unsupported new feature. This matches
+ * the behavior of the Linux implementation.
+ */
+ if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) ||
+ vxh->vxlh_vni & ~htonl(VXLAN_VNI_MASK))
+ goto out;
+
+ vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT;
+ /* Adjust to the start of the inner Ethernet frame. */
+ m_adj(m, offset + sizeof(struct vxlan_header));
+
+ error = vxlan_input(vso, vni, &m, srcsa);
+ MPASS(error != 0 || m == NULL);
+
+out:
+ if (m != NULL)
+ m_freem(m);
+}
+
+static int
+vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0,
+ const struct sockaddr *sa)
+{
+ struct vxlan_softc *sc;
+ struct ifnet *ifp;
+ struct mbuf *m;
+ struct ether_header *eh;
+ int error;
+
+ sc = vxlan_socket_lookup_softc(vso, vni);
+ if (sc == NULL)
+ return (ENOENT);
+
+ ifp = sc->vxl_ifp;
+ m = *m0;
+ eh = mtod(m, struct ether_header *);
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ error = ENETDOWN;
+ goto out;
+ } else if (ifp == m->m_pkthdr.rcvif) {
+ /* XXX Does not catch more complex loops. */
+ error = EDEADLK;
+ goto out;
+ }
+
+ if (sc->vxl_flags & VXLAN_FLAG_LEARN)
+ vxlan_ftable_update(sc, sa, eh->ether_shost);
+
+ m_clrprotoflags(m);
+ m->m_pkthdr.rcvif = ifp;
+ M_SETFIB(m, ifp->if_fib);
+
+ error = netisr_queue_src(NETISR_ETHER, 0, m);
+ *m0 = NULL;
+
+out:
+ vxlan_release(sc);
+ return (error);
+}
+
+static void
+vxlan_set_default_config(struct vxlan_softc *sc)
+{
+
+ sc->vxl_flags |= VXLAN_FLAG_LEARN;
+
+ sc->vxl_vni = VXLAN_VNI_MAX;
+ sc->vxl_ttl = IPDEFTTL;
+
+ if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) {
+ sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT);
+ sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT);
+ } else {
+ sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
+ sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
+ }
+
+ sc->vxl_min_port = V_ipport_firstauto;
+ sc->vxl_max_port = V_ipport_lastauto;
+
+ sc->vxl_ftable_max = VXLAN_FTABLE_MAX;
+ sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT;
+}
+
+static int
+vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp)
+{
+
+#ifndef INET
+ if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 |
+ VXLAN_PARAM_WITH_REMOTE_ADDR4))
+ return (EAFNOSUPPORT);
+#endif
+
+#ifndef INET6
+ if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 |
+ VXLAN_PARAM_WITH_REMOTE_ADDR6))
+ return (EAFNOSUPPORT);
+#endif
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) {
+ if (vxlan_check_vni(vxlp->vxlp_vni) == 0)
+ sc->vxl_vni = vxlp->vxlp_vni;
+ }
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) {
+ sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in);
+ sc->vxl_src_addr.in4.sin_family = AF_INET;
+ sc->vxl_src_addr.in4.sin_addr = vxlp->vxlp_local_in4;
+ } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
+ sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
+ sc->vxl_src_addr.in6.sin6_family = AF_INET6;
+ sc->vxl_src_addr.in6.sin6_addr = vxlp->vxlp_local_in6;
+ }
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) {
+ sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in);
+ sc->vxl_dst_addr.in4.sin_family = AF_INET;
+ sc->vxl_dst_addr.in4.sin_addr = vxlp->vxlp_remote_in4;
+ } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
+ sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
+ sc->vxl_dst_addr.in6.sin6_family = AF_INET6;
+ sc->vxl_dst_addr.in6.sin6_addr = vxlp->vxlp_remote_in6;
+ }
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT)
+ sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port);
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT)
+ sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port);
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) {
+ if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) {
+ sc->vxl_min_port = vxlp->vxlp_min_port;
+ sc->vxl_max_port = vxlp->vxlp_max_port;
+ }
+ }
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF)
+ strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ);
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) {
+ if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0)
+ sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout;
+ }
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) {
+ if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0)
+ sc->vxl_ftable_max = vxlp->vxlp_ftable_max;
+ }
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) {
+ if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0)
+ sc->vxl_ttl = vxlp->vxlp_ttl;
+ }
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) {
+ if (vxlp->vxlp_learn == 0)
+ sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
+ }
+
+ return (0);
+}
+
+static int
+vxlan_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct vxlan_softc *sc;
+ struct ifnet *ifp;
+ struct ifvxlanparam vxlp;
+ int error;
+
+ sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO);
+ sc->vxl_unit = unit;
+ vxlan_set_default_config(sc);
+
+ if (params != 0) {
+ error = copyin(params, &vxlp, sizeof(vxlp));
+ if (error)
+ goto fail;
+
+ error = vxlan_set_user_config(sc, &vxlp);
+ if (error)
+ goto fail;
+ }
+
+ ifp = if_alloc(IFT_ETHER);
+ if (ifp == NULL) {
+ error = ENOSPC;
+ goto fail;
+ }
+
+ sc->vxl_ifp = ifp;
+ rm_init(&sc->vxl_lock, "vxlanrm");
+ callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0);
+ sc->vxl_port_hash_key = arc4random();
+ vxlan_ftable_init(sc);
+
+ vxlan_sysctl_setup(sc);
+
+ ifp->if_softc = sc;
+ if_initname(ifp, vxlan_name, unit);
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_init = vxlan_init;
+ ifp->if_ioctl = vxlan_ioctl;
+ ifp->if_transmit = vxlan_transmit;
+ ifp->if_qflush = vxlan_qflush;
+
+ vxlan_fakeaddr(sc);
+ ether_ifattach(ifp, sc->vxl_hwaddr);
+
+ ifp->if_baudrate = 0;
+ ifp->if_hdrlen = 0;
+
+ return (0);
+
+fail:
+ free(sc, M_VXLAN);
+ return (error);
+}
+
+static void
+vxlan_clone_destroy(struct ifnet *ifp)
+{
+ struct vxlan_softc *sc;
+
+ sc = ifp->if_softc;
+
+ vxlan_teardown(sc);
+
+ vxlan_ftable_flush(sc, 1);
+
+ ether_ifdetach(ifp);
+ if_free(ifp);
+
+ vxlan_ftable_fini(sc);
+
+ vxlan_sysctl_destroy(sc);
+ rm_destroy(&sc->vxl_lock);
+ free(sc, M_VXLAN);
+}
+
+/* BMV: Taken from if_bridge. */
+static uint32_t
+vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr)
+{
+ uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key;
+
+ b += addr[5] << 8;
+ b += addr[4];
+ a += addr[3] << 24;
+ a += addr[2] << 16;
+ a += addr[1] << 8;
+ a += addr[0];
+
+/*
+ * The following hash function is adapted from "Hash Functions" by Bob Jenkins
+ * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
+ */
+#define mix(a, b, c) \
+do { \
+ a -= b; a -= c; a ^= (c >> 13); \
+ b -= c; b -= a; b ^= (a << 8); \
+ c -= a; c -= b; c ^= (b >> 13); \
+ a -= b; a -= c; a ^= (c >> 12); \
+ b -= c; b -= a; b ^= (a << 16); \
+ c -= a; c -= b; c ^= (b >> 5); \
+ a -= b; a -= c; a ^= (c >> 3); \
+ b -= c; b -= a; b ^= (a << 10); \
+ c -= a; c -= b; c ^= (b >> 15); \
+} while (0)
+
+ mix(a, b, c);
+
+#undef mix
+
+ return (c);
+}
+
+static void
+vxlan_fakeaddr(struct vxlan_softc *sc)
+{
+
+ /*
+ * Generate a non-multicast, locally administered address.
+ *
+ * BMV: Should we use the FreeBSD OUI range instead?
+ */
+ arc4rand(sc->vxl_hwaddr, ETHER_ADDR_LEN, 1);
+ sc->vxl_hwaddr[0] &= ~1;
+ sc->vxl_hwaddr[0] |= 2;
+}
+
+static int
+vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr,
+ const struct sockaddr *sa)
+{
+
+ return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len));
+}
+
+static void
+vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr,
+ const struct sockaddr *sa)
+{
+
+ MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
+ bzero(vxladdr, sizeof(*vxladdr));
+
+ if (sa->sa_family == AF_INET) {
+ vxladdr->in4 = *satoconstsin(sa);
+ vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
+ } else if (sa->sa_family == AF_INET6) {
+ vxladdr->in6 = *satoconstsin6(sa);
+ vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
+ }
+}
+
+static int
+vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr,
+ const struct sockaddr *sa)
+{
+ int equal;
+
+ if (sa->sa_family == AF_INET) {
+ const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
+ equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr;
+ } else if (sa->sa_family == AF_INET6) {
+ const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
+ equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr);
+ } else
+ equal = 0;
+
+ return (equal);
+}
+
+static void
+vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr,
+ const struct sockaddr *sa)
+{
+
+ MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
+
+ if (sa->sa_family == AF_INET) {
+ const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
+ vxladdr->in4.sin_family = AF_INET;
+ vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
+ vxladdr->in4.sin_addr = *in4;
+ } else if (sa->sa_family == AF_INET6) {
+ const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
+ vxladdr->in6.sin6_family = AF_INET6;
+ vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
+ vxladdr->in6.sin6_addr = *in6;
+ }
+}
+
+static int
+vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec)
+{
+ const struct sockaddr *sa;
+ int supported;
+
+ sa = &vxladdr->sa;
+ supported = 0;
+
+ if (sa->sa_family == AF_UNSPEC && unspec != 0) {
+ supported = 1;
+ } else if (sa->sa_family == AF_INET) {
+#ifdef INET
+ supported = 1;
+#endif
+ } else if (sa->sa_family == AF_INET6) {
+#ifdef INET6
+ supported = 1;
+#endif
+ }
+
+ return (supported);
+}
+
+static int
+vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr)
+{
+ const struct sockaddr *sa;
+ int any;
+
+ sa = &vxladdr->sa;
+
+ if (sa->sa_family == AF_INET) {
+ const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
+ any = in4->s_addr == INADDR_ANY;
+ } else if (sa->sa_family == AF_INET6) {
+ const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
+ any = IN6_IS_ADDR_UNSPECIFIED(in6);
+ } else
+ any = -1;
+
+ return (any);
+}
+
+static int
+vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr)
+{
+ const struct sockaddr *sa;
+ int mc;
+
+ sa = &vxladdr->sa;
+
+ if (sa->sa_family == AF_INET) {
+ const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
+ mc = IN_MULTICAST(ntohl(in4->s_addr));
+ } else if (sa->sa_family == AF_INET6) {
+ const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
+ mc = IN6_IS_ADDR_MULTICAST(in6);
+ } else
+ mc = -1;
+
+ return (mc);
+}
+
+static int
+vxlan_can_change_config(struct vxlan_softc *sc)
+{
+ struct ifnet *ifp;
+
+ ifp = sc->vxl_ifp;
+ VXLAN_LOCK_ASSERT(sc);
+
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ return (0);
+ if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN))
+ return (0);
+
+ return (1);
+}
+
+static int
+vxlan_check_vni(uint32_t vni)
+{
+
+ return (vni >= VXLAN_VNI_MAX);
+}
+
+static int
+vxlan_check_ttl(int ttl)
+{
+
+ return (ttl > MAXTTL);
+}
+
+static int
+vxlan_check_ftable_timeout(uint32_t timeout)
+{
+
+ return (timeout > VXLAN_FTABLE_MAX_TIMEOUT);
+}
+
+static int
+vxlan_check_ftable_max(uint32_t max)
+{
+
+ return (max > VXLAN_FTABLE_MAX);
+}
+
+static void
+vxlan_sysctl_setup(struct vxlan_softc *sc)
+{
+ struct sysctl_ctx_list *ctx;
+ struct sysctl_oid *node;
+ struct vxlan_statistics *stats;
+ char namebuf[8];
+
+ ctx = &sc->vxl_sysctl_ctx;
+ stats = &sc->vxl_stats;
+ snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit);
+
+ sysctl_ctx_init(ctx);
+ sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx,
+ SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf,
+ CTLFLAG_RD, NULL, "");
+
+ node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
+ OID_AUTO, "ftable", CTLFLAG_RD, NULL, "");
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count",
+ CTLFLAG_RD, &sc->vxl_ftable_cnt, 0,
+ "Number of entries in fowarding table");
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max",
+ CTLFLAG_RD, &sc->vxl_ftable_max, 0,
+ "Maximum number of entries allowed in fowarding table");
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout",
+ CTLFLAG_RD, &sc->vxl_ftable_timeout, 0,
+ "Number of seconds between prunes of the forwarding table");
+ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump",
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
+ sc, 0, vxlan_ftable_sysctl_dump, "A",
+ "Dump the forwarding table entries");
+
+ node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
+ OID_AUTO, "stats", CTLFLAG_RD, NULL, "");
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0,
+ "Fowarding table reached maximum entries");
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "ftable_lock_upgrade_failed", CTLFLAG_RD,
+ &stats->ftable_lock_upgrade_failed, 0,
+ "Forwarding table update required lock upgrade");
+}
+
+static void
+vxlan_sysctl_destroy(struct vxlan_softc *sc)
+{
+
+ sysctl_ctx_free(&sc->vxl_sysctl_ctx);
+ sc->vxl_sysctl_node = NULL;
+}
+
+static int
+vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def)
+{
+ char path[64];
+
+ snprintf(path, sizeof(path), "net.link.vxlan.%d.%s",
+ sc->vxl_unit, knob);
+ TUNABLE_INT_FETCH(path, &def);
+
+ return (def);
+}
+
+static void
+vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp)
+{
+ struct vxlan_softc_head list;
+ struct vxlan_socket *vso;
+ struct vxlan_softc *sc, *tsc;
+
+ LIST_INIT(&list);
+
+ if (ifp->if_flags & IFF_RENAMING)
+ return;
+ if ((ifp->if_flags & IFF_MULTICAST) == 0)
+ return;
+
+ mtx_lock(&vxlan_list_mtx);
+ LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry)
+ vxlan_socket_ifdetach(vso, ifp, &list);
+ mtx_unlock(&vxlan_list_mtx);
+
+ LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) {
+ LIST_REMOVE(sc, vxl_ifdetach_list);
+
+ VXLAN_WLOCK(sc);
+ if (sc->vxl_flags & VXLAN_FLAG_INIT)
+ vxlan_init_wait(sc);
+ vxlan_teardown_locked(sc);
+ }
+}
+
+static void
+vxlan_load(void)
+{
+
+ mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF);
+ LIST_INIT(&vxlan_socket_list);
+ vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
+ vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY);
+ vxlan_cloner = if_clone_simple(vxlan_name, vxlan_clone_create,
+ vxlan_clone_destroy, 0);
+}
+
+static void
+vxlan_unload(void)
+{
+
+ EVENTHANDLER_DEREGISTER(ifnet_departure_event,
+ vxlan_ifdetach_event_tag);
+ if_clone_detach(vxlan_cloner);
+ mtx_destroy(&vxlan_list_mtx);
+ MPASS(LIST_EMPTY(&vxlan_socket_list));
+}
+
+static int
+vxlan_modevent(module_t mod, int type, void *unused)
+{
+ int error;
+
+ error = 0;
+
+ switch (type) {
+ case MOD_LOAD:
+ vxlan_load();
+ break;
+ case MOD_UNLOAD:
+ vxlan_unload();
+ break;
+ default:
+ error = ENOTSUP;
+ break;
+ }
+
+ return (error);
+}
+
+static moduledata_t vxlan_mod = {
+ "if_vxlan",
+ vxlan_modevent,
+ 0
+};
+
+DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_vxlan, 1);
Property changes on: trunk/sys/net/if_vxlan.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/net/if_vxlan.h
===================================================================
--- trunk/sys/net/if_vxlan.h (rev 0)
+++ trunk/sys/net/if_vxlan.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,149 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014, Bryan Venteicher <bryanv at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/net/if_vxlan.h 284365 2015-06-14 03:14:45Z bryanv $
+ */
+
+#ifndef _NET_IF_VXLAN_H_
+#define _NET_IF_VXLAN_H_
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+
+struct vxlan_header {
+ uint32_t vxlh_flags;
+ uint32_t vxlh_vni;
+};
+
+#define VXLAN_HDR_FLAGS_VALID_VNI 0x08000000
+#define VXLAN_HDR_VNI_SHIFT 8
+
+#define VXLAN_VNI_MAX (1 << 24)
+#define VXLAN_VNI_MASK (VXLAN_VNI_MAX - 1)
+
+/*
+ * The port assigned by IANA is 4789, but some early implementations
+ * (like Linux) use 8472 instead. If not specified, we default to
+ * the IANA port.
+ */
+#define VXLAN_PORT 4789
+#define VXLAN_LEGACY_PORT 8472
+
+struct ifvxlanparam {
+ uint64_t vxlp_with;
+
+#define VXLAN_PARAM_WITH_VNI 0x0001
+#define VXLAN_PARAM_WITH_LOCAL_ADDR4 0x0002
+#define VXLAN_PARAM_WITH_LOCAL_ADDR6 0x0004
+#define VXLAN_PARAM_WITH_REMOTE_ADDR4 0x0008
+#define VXLAN_PARAM_WITH_REMOTE_ADDR6 0x0010
+#define VXLAN_PARAM_WITH_LOCAL_PORT 0x0020
+#define VXLAN_PARAM_WITH_REMOTE_PORT 0x0040
+#define VXLAN_PARAM_WITH_PORT_RANGE 0x0080
+#define VXLAN_PARAM_WITH_FTABLE_TIMEOUT 0x0100
+#define VXLAN_PARAM_WITH_FTABLE_MAX 0x0200
+#define VXLAN_PARAM_WITH_MULTICAST_IF 0x0400
+#define VXLAN_PARAM_WITH_TTL 0x0800
+#define VXLAN_PARAM_WITH_LEARN 0x1000
+
+ uint32_t vxlp_vni;
+ struct in_addr vxlp_local_in4;
+ struct in6_addr vxlp_local_in6;
+ struct in_addr vxlp_remote_in4;
+ struct in6_addr vxlp_remote_in6;
+ uint16_t vxlp_local_port;
+ uint16_t vxlp_remote_port;
+ uint16_t vxlp_min_port;
+ uint16_t vxlp_max_port;
+ char vxlp_mc_ifname[IFNAMSIZ];
+ uint32_t vxlp_ftable_timeout;
+ uint32_t vxlp_ftable_max;
+ uint8_t vxlp_ttl;
+ uint8_t vxlp_learn;
+};
+
+union vxlan_sockaddr {
+ struct sockaddr sa;
+ struct sockaddr_in in4;
+ struct sockaddr_in6 in6;
+};
+
+#define VXLAN_SOCKADDR_IS_IPV4(_vxsin) ((_vxsin)->sa.sa_family == AF_INET)
+#define VXLAN_SOCKADDR_IS_IPV6(_vxsin) ((_vxsin)->sa.sa_family == AF_INET6)
+#define VXLAN_SOCKADDR_IS_IPV46(_vxsin) \
+ (VXLAN_SOCKADDR_IS_IPV4(_vxsin) || VXLAN_SOCKADDR_IS_IPV6(_vxsin))
+
+#define VXLAN_CMD_GET_CONFIG 0
+#define VXLAN_CMD_SET_VNI 1
+#define VXLAN_CMD_SET_LOCAL_ADDR 2
+#define VXLAN_CMD_SET_REMOTE_ADDR 4
+#define VXLAN_CMD_SET_LOCAL_PORT 5
+#define VXLAN_CMD_SET_REMOTE_PORT 6
+#define VXLAN_CMD_SET_PORT_RANGE 7
+#define VXLAN_CMD_SET_FTABLE_TIMEOUT 8
+#define VXLAN_CMD_SET_FTABLE_MAX 9
+#define VXLAN_CMD_SET_MULTICAST_IF 10
+#define VXLAN_CMD_SET_TTL 11
+#define VXLAN_CMD_SET_LEARN 12
+#define VXLAN_CMD_FTABLE_ENTRY_ADD 13
+#define VXLAN_CMD_FTABLE_ENTRY_REM 14
+#define VXLAN_CMD_FLUSH 15
+
+struct ifvxlancfg {
+ uint32_t vxlc_vni;
+ union vxlan_sockaddr vxlc_local_sa;
+ union vxlan_sockaddr vxlc_remote_sa;
+ uint32_t vxlc_mc_ifindex;
+ uint32_t vxlc_ftable_cnt;
+ uint32_t vxlc_ftable_max;
+ uint32_t vxlc_ftable_timeout;
+ uint16_t vxlc_port_min;
+ uint16_t vxlc_port_max;
+ uint8_t vxlc_learn;
+ uint8_t vxlc_ttl;
+};
+
+struct ifvxlancmd {
+ uint32_t vxlcmd_flags;
+#define VXLAN_CMD_FLAG_FLUSH_ALL 0x0001
+#define VXLAN_CMD_FLAG_LEARN 0x0002
+
+ uint32_t vxlcmd_vni;
+ uint32_t vxlcmd_ftable_timeout;
+ uint32_t vxlcmd_ftable_max;
+ uint16_t vxlcmd_port;
+ uint16_t vxlcmd_port_min;
+ uint16_t vxlcmd_port_max;
+ uint8_t vxlcmd_mac[ETHER_ADDR_LEN];
+ uint8_t vxlcmd_ttl;
+ union vxlan_sockaddr vxlcmd_sa;
+ char vxlcmd_ifname[IFNAMSIZ];
+};
+
+#endif /* _NET_IF_VXLAN_H_ */
Property changes on: trunk/sys/net/if_vxlan.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/iso88025.h
===================================================================
--- trunk/sys/net/iso88025.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/iso88025.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -31,7 +31,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/iso88025.h 194581 2009-06-21 10:29:31Z rdivacky $
+ * $FreeBSD: stable/10/sys/net/iso88025.h 264299 2014-04-09 11:15:50Z glebius $
*
* Information gathered from tokenring at freebsd, /sys/net/ethernet.h and
* the Mach token ring driver.
@@ -163,11 +163,13 @@
#define ISO88025_BPF_UNSUPPORTED 0
#define ISO88025_BPF_SUPPORTED 1
+#ifdef _KERNEL
void iso88025_ifattach (struct ifnet *, const u_int8_t *, int);
void iso88025_ifdetach (struct ifnet *, int);
int iso88025_ioctl (struct ifnet *, u_long, caddr_t );
-int iso88025_output (struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *);
+int iso88025_output (struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
void iso88025_input (struct ifnet *, struct mbuf *);
+#endif /* _KERNEL */
-#endif
+#endif /* !_NET_ISO88025_H_ */
Added: trunk/sys/net/mppc.h
===================================================================
--- trunk/sys/net/mppc.h (rev 0)
+++ trunk/sys/net/mppc.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,63 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2007 Alexander Motin <mav at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/net/mppc.h 302061 2016-06-21 15:47:54Z pfg $
+ */
+
+/*
+ * MPPC decompression library.
+ * Version 1.0
+ *
+ * Note that Hi/Fn (later acquired by Exar Corporation) held US patents
+ * on some implementation-critical aspects of MPPC compression.
+ * These patents lapsed due to non-payment of fees in 2007 and by 2015
+ * expired altogether.
+ */
+
+#ifndef _NET_MPPC_H_
+#define _NET_MPPC_H_
+
+#define MPPC_MANDATORY_COMPRESS_FLAGS 0
+#define MPPC_MANDATORY_DECOMPRESS_FLAGS 0
+
+#define MPPC_SAVE_HISTORY 1
+
+#define MPPC_OK 5
+#define MPPC_EXPANDED 8
+#define MPPC_RESTART_HISTORY 16
+#define MPPC_DEST_EXHAUSTED 32
+
+extern size_t MPPC_SizeOfCompressionHistory(void);
+extern size_t MPPC_SizeOfDecompressionHistory(void);
+
+extern void MPPC_InitCompressionHistory(char *history);
+extern void MPPC_InitDecompressionHistory(char *history);
+
+extern int MPPC_Compress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags, int undef);
+extern int MPPC_Decompress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags);
+
+#endif
Property changes on: trunk/sys/net/mppc.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/net/mppcc.c
===================================================================
--- trunk/sys/net/mppcc.c (rev 0)
+++ trunk/sys/net/mppcc.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,300 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2002-2004 Jan Dubiec <jdx at slackware.pl>
+ * Copyright (c) 2007 Alexander Motin <mav at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/net/mppcc.c 302774 2016-07-13 16:20:18Z pfg $
+ */
+
+/*
+ * MPPC decompression library.
+ * Version 1.0
+ *
+ * Note that Hi/Fn (later acquired by Exar Corporation) held US patents
+ * on some implementation-critical aspects of MPPC compression.
+ * These patents lapsed due to non-payment of fees in 2007 and by 2015
+ * expired altogether.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <net/mppc.h>
+
+#define MPPE_HIST_LEN 8192
+
+#define HASH(x) (((40543*(((((x)[0]<<4)^(x)[1])<<4)^(x)[2]))>>4) & 0x1fff)
+
+struct MPPC_comp_state {
+ uint8_t hist[2*MPPE_HIST_LEN];
+ uint16_t histptr;
+ uint16_t hash[MPPE_HIST_LEN];
+};
+
+/* Inserts 1 to 8 bits into the output buffer. */
+static void __inline
+putbits8(uint8_t *buf, uint32_t val, const uint32_t n, uint32_t *i, uint32_t *l)
+{
+ buf += *i;
+ if (*l >= n) {
+ *l = (*l) - n;
+ val <<= *l;
+ *buf = *buf | (val & 0xff);
+ if (*l == 0) {
+ *l = 8;
+ (*i)++;
+ *(++buf) = 0;
+ }
+ } else {
+ (*i)++;
+ *l = 8 - n + (*l);
+ val <<= *l;
+ *buf = *buf | ((val >> 8) & 0xff);
+ *(++buf) = val & 0xff;
+ }
+}
+
+/* Inserts 9 to 16 bits into the output buffer. */
+static void __inline
+putbits16(uint8_t *buf, uint32_t val, const uint32_t n, uint32_t *i, uint32_t *l)
+{
+ buf += *i;
+ if (*l >= n - 8) {
+ (*i)++;
+ *l = 8 - n + (*l);
+ val <<= *l;
+ *buf = *buf | ((val >> 8) & 0xff);
+ *(++buf) = val & 0xff;
+ if (*l == 0) {
+ *l = 8;
+ (*i)++;
+ *(++buf) = 0;
+ }
+ } else {
+ (*i)++; (*i)++;
+ *l = 16 - n + (*l);
+ val <<= *l;
+ *buf = *buf | ((val >> 16) & 0xff);
+ *(++buf) = (val >> 8) & 0xff;
+ *(++buf) = val & 0xff;
+ }
+}
+
+/* Inserts 17 to 24 bits into the output buffer. */
+static void __inline
+putbits24(uint8_t *buf, uint32_t val, const uint32_t n, uint32_t *i, uint32_t *l)
+{
+ buf += *i;
+ if (*l >= n - 16) {
+ (*i)++; (*i)++;
+ *l = 16 - n + (*l);
+ val <<= *l;
+ *buf = *buf | ((val >> 16) & 0xff);
+ *(++buf) = (val >> 8) & 0xff;
+ *(++buf) = val & 0xff;
+ if (*l == 0) {
+ *l = 8;
+ (*i)++;
+ *(++buf) = 0;
+ }
+ } else {
+ (*i)++; (*i)++; (*i)++;
+ *l = 24 - n + (*l);
+ val <<= *l;
+ *buf = *buf | ((val >> 24) & 0xff);
+ *(++buf) = (val >> 16) & 0xff;
+ *(++buf) = (val >> 8) & 0xff;
+ *(++buf) = val & 0xff;
+ }
+}
+
+size_t MPPC_SizeOfCompressionHistory(void)
+{
+ return (sizeof(struct MPPC_comp_state));
+}
+
+void MPPC_InitCompressionHistory(char *history)
+{
+ struct MPPC_comp_state *state = (struct MPPC_comp_state*)history;
+
+ bzero(history, sizeof(struct MPPC_comp_state));
+ state->histptr = MPPE_HIST_LEN;
+}
+
+int MPPC_Compress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags, int undef)
+{
+ struct MPPC_comp_state *state = (struct MPPC_comp_state*)history;
+ uint32_t olen, off, len, idx, i, l;
+ uint8_t *hist, *sbuf, *p, *q, *r, *s;
+ int rtn = MPPC_OK;
+
+ /*
+ * At this point, to avoid possible buffer overflow caused by packet
+ * expansion during/after compression, we should make sure we have
+ * space for the worst case.
+
+ * Maximum MPPC packet expansion is 12.5%. This is the worst case when
+ * all octets in the input buffer are >= 0x80 and we cannot find any
+ * repeated tokens.
+ */
+ if (*dstCnt < (*srcCnt * 9 / 8 + 2)) {
+ rtn &= ~MPPC_OK;
+ return (rtn);
+ }
+
+ /* We can't compress more then MPPE_HIST_LEN bytes in a call. */
+ if (*srcCnt > MPPE_HIST_LEN) {
+ rtn &= ~MPPC_OK;
+ return (rtn);
+ }
+
+ hist = state->hist + MPPE_HIST_LEN;
+ /* check if there is enough room at the end of the history */
+ if (state->histptr + *srcCnt >= 2*MPPE_HIST_LEN) {
+ rtn |= MPPC_RESTART_HISTORY;
+ state->histptr = MPPE_HIST_LEN;
+ memcpy(state->hist, hist, MPPE_HIST_LEN);
+ }
+ /* Add packet to the history. */
+ sbuf = state->hist + state->histptr;
+ memcpy(sbuf, *src, *srcCnt);
+ state->histptr += *srcCnt;
+
+ /* compress data */
+ r = sbuf + *srcCnt;
+ **dst = olen = i = 0;
+ l = 8;
+ while (i < *srcCnt - 2) {
+ s = q = sbuf + i;
+
+ /* Prognose matching position using hash function. */
+ idx = HASH(s);
+ p = hist + state->hash[idx];
+ state->hash[idx] = (uint16_t) (s - hist);
+ if (p > s) /* It was before MPPC_RESTART_HISTORY. */
+ p -= MPPE_HIST_LEN; /* Try previous history buffer. */
+ off = s - p;
+
+ /* Check our prognosis. */
+ if (off > MPPE_HIST_LEN - 1 || off < 1 || *p++ != *s++ ||
+ *p++ != *s++ || *p++ != *s++) {
+ /* No match found; encode literal byte. */
+ if ((*src)[i] < 0x80) { /* literal byte < 0x80 */
+ putbits8(*dst, (uint32_t) (*src)[i], 8, &olen, &l);
+ } else { /* literal byte >= 0x80 */
+ putbits16(*dst, (uint32_t) (0x100|((*src)[i]&0x7f)), 9,
+ &olen, &l);
+ }
+ ++i;
+ continue;
+ }
+
+ /* Find length of the matching fragment */
+#if defined(__amd64__) || defined(__i386__)
+ /* Optimization for CPUs without strict data aligning requirements */
+ while ((*((uint32_t*)p) == *((uint32_t*)s)) && (s < (r - 3))) {
+ p+=4;
+ s+=4;
+ }
+#endif
+ while((*p++ == *s++) && (s <= r));
+ len = s - q - 1;
+ i += len;
+
+ /* At least 3 character match found; code data. */
+ /* Encode offset. */
+ if (off < 64) { /* 10-bit offset; 0 <= offset < 64 */
+ putbits16(*dst, 0x3c0|off, 10, &olen, &l);
+ } else if (off < 320) { /* 12-bit offset; 64 <= offset < 320 */
+ putbits16(*dst, 0xe00|(off-64), 12, &olen, &l);
+ } else if (off < 8192) { /* 16-bit offset; 320 <= offset < 8192 */
+ putbits16(*dst, 0xc000|(off-320), 16, &olen, &l);
+ } else { /* NOTREACHED */
+ rtn &= ~MPPC_OK;
+ return (rtn);
+ }
+
+ /* Encode length of match. */
+ if (len < 4) { /* length = 3 */
+ putbits8(*dst, 0, 1, &olen, &l);
+ } else if (len < 8) { /* 4 <= length < 8 */
+ putbits8(*dst, 0x08|(len&0x03), 4, &olen, &l);
+ } else if (len < 16) { /* 8 <= length < 16 */
+ putbits8(*dst, 0x30|(len&0x07), 6, &olen, &l);
+ } else if (len < 32) { /* 16 <= length < 32 */
+ putbits8(*dst, 0xe0|(len&0x0f), 8, &olen, &l);
+ } else if (len < 64) { /* 32 <= length < 64 */
+ putbits16(*dst, 0x3c0|(len&0x1f), 10, &olen, &l);
+ } else if (len < 128) { /* 64 <= length < 128 */
+ putbits16(*dst, 0xf80|(len&0x3f), 12, &olen, &l);
+ } else if (len < 256) { /* 128 <= length < 256 */
+ putbits16(*dst, 0x3f00|(len&0x7f), 14, &olen, &l);
+ } else if (len < 512) { /* 256 <= length < 512 */
+ putbits16(*dst, 0xfe00|(len&0xff), 16, &olen, &l);
+ } else if (len < 1024) { /* 512 <= length < 1024 */
+ putbits24(*dst, 0x3fc00|(len&0x1ff), 18, &olen, &l);
+ } else if (len < 2048) { /* 1024 <= length < 2048 */
+ putbits24(*dst, 0xff800|(len&0x3ff), 20, &olen, &l);
+ } else if (len < 4096) { /* 2048 <= length < 4096 */
+ putbits24(*dst, 0x3ff000|(len&0x7ff), 22, &olen, &l);
+ } else if (len < 8192) { /* 4096 <= length < 8192 */
+ putbits24(*dst, 0xffe000|(len&0xfff), 24, &olen, &l);
+ } else { /* NOTREACHED */
+ rtn &= ~MPPC_OK;
+ return (rtn);
+ }
+ }
+
+ /* Add remaining octets to the output. */
+ while(*srcCnt - i > 0) {
+ if ((*src)[i] < 0x80) { /* literal byte < 0x80 */
+ putbits8(*dst, (uint32_t) (*src)[i++], 8, &olen, &l);
+ } else { /* literal byte >= 0x80 */
+ putbits16(*dst, (uint32_t) (0x100|((*src)[i++]&0x7f)), 9, &olen,
+ &l);
+ }
+ }
+
+ /* Reset unused bits of the last output octet. */
+ if ((l != 0) && (l != 8)) {
+ putbits8(*dst, 0, l, &olen, &l);
+ }
+
+ /* If result is bigger then original, set flag and flush history. */
+ if ((*srcCnt < olen) || ((flags & MPPC_SAVE_HISTORY) == 0)) {
+ if (*srcCnt < olen)
+ rtn |= MPPC_EXPANDED;
+ bzero(history, sizeof(struct MPPC_comp_state));
+ state->histptr = MPPE_HIST_LEN;
+ }
+
+ *src += *srcCnt;
+ *srcCnt = 0;
+ *dst += olen;
+ *dstCnt -= olen;
+
+ return (rtn);
+}
Property changes on: trunk/sys/net/mppcc.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/net/mppcd.c
===================================================================
--- trunk/sys/net/mppcd.c (rev 0)
+++ trunk/sys/net/mppcd.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,285 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2002-2004 Jan Dubiec <jdx at slackware.pl>
+ * Copyright (c) 2007 Alexander Motin <mav at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/net/mppcd.c 302774 2016-07-13 16:20:18Z pfg $
+ */
+
+/*
+ * MPPC decompression library.
+ * Version 1.0
+ *
+ * Note that Hi/Fn (later acquired by Exar Corporation) held US patents
+ * on some implementation-critical aspects of MPPC compression.
+ * These patents lapsed due to non-payment of fees in 2007 and by 2015
+ * expired altogether.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <net/mppc.h>
+
+#define MPPE_HIST_LEN 8192
+
+struct MPPC_decomp_state {
+ uint8_t hist[2*MPPE_HIST_LEN];
+ uint16_t histptr;
+};
+
+static uint32_t __inline
+getbits(const uint8_t *buf, const uint32_t n, uint32_t *i, uint32_t *l)
+{
+ static const uint32_t m[] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
+ uint32_t res, ol;
+
+ ol = *l;
+ if (*l >= n) {
+ *l = (*l) - n;
+ res = (buf[*i] & m[ol]) >> (*l);
+ if (*l == 0) {
+ *l = 8;
+ (*i)++;
+ }
+ } else {
+ *l = 8 - n + (*l);
+ res = (buf[(*i)++] & m[ol]) << 8;
+ res = (res | buf[*i]) >> (*l);
+ }
+
+ return (res);
+}
+
+static uint32_t __inline
+getbyte(const uint8_t *buf, const uint32_t i, const uint32_t l)
+{
+ if (l == 8) {
+ return (buf[i]);
+ } else {
+ return ((((buf[i] << 8) | buf[i+1]) >> l) & 0xff);
+ }
+}
+
+static void __inline
+lamecopy(uint8_t *dst, uint8_t *src, uint32_t len)
+{
+ while (len--)
+ *dst++ = *src++;
+}
+
+size_t MPPC_SizeOfDecompressionHistory(void)
+{
+ return (sizeof(struct MPPC_decomp_state));
+}
+
+void MPPC_InitDecompressionHistory(char *history)
+{
+ struct MPPC_decomp_state *state = (struct MPPC_decomp_state*)history;
+
+ bzero(history, sizeof(struct MPPC_decomp_state));
+ state->histptr = MPPE_HIST_LEN;
+}
+
+int MPPC_Decompress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags)
+{
+ struct MPPC_decomp_state *state = (struct MPPC_decomp_state*)history;
+ uint32_t olen, off, len, bits, val, sig, i, l;
+ uint8_t *hist, *s;
+ u_char *isrc = *src;
+ int rtn = MPPC_OK;
+
+ if ((flags & MPPC_RESTART_HISTORY) != 0) {
+ memcpy(state->hist, state->hist + MPPE_HIST_LEN, MPPE_HIST_LEN);
+ state->histptr = MPPE_HIST_LEN;
+ }
+
+ hist = state->hist + state->histptr;
+ olen = len = i = 0;
+ l = 8;
+ bits = *srcCnt * 8;
+ while (bits >= 8) {
+ val = getbyte(isrc, i++, l);
+ if (val < 0x80) { /* literal byte < 0x80 */
+ if (state->histptr < 2*MPPE_HIST_LEN) {
+ /* Copy uncompressed byte to the history. */
+ (state->hist)[(state->histptr)++] = (uint8_t) val;
+ } else {
+ /* Buffer overflow; drop packet. */
+ rtn &= ~MPPC_OK;
+ return rtn;
+ }
+ olen++;
+ bits -= 8;
+ continue;
+ }
+
+ sig = val & 0xc0;
+ if (sig == 0x80) { /* literal byte >= 0x80 */
+ if (state->histptr < 2*MPPE_HIST_LEN) {
+ /* Copy uncompressed byte to the history. */
+ (state->hist)[(state->histptr)++] =
+ (uint8_t) (0x80|((val&0x3f)<<1)|getbits(isrc, 1 , &i ,&l));
+ } else {
+ /* buffer overflow; drop packet */
+ rtn &= ~MPPC_OK;
+ return (rtn);
+ }
+ olen++;
+ bits -= 9;
+ continue;
+ }
+
+ /* Not a literal byte so it must be an (offset,length) pair */
+ /* decode offset */
+ sig = val & 0xf0;
+ if (sig == 0xf0) { /* 10-bit offset; 0 <= offset < 64 */
+ off = (((val&0x0f)<<2)|getbits(isrc, 2 , &i ,&l));
+ bits -= 10;
+ } else {
+ if (sig == 0xe0) { /* 12-bit offset; 64 <= offset < 320 */
+ off = ((((val&0x0f)<<4)|getbits(isrc, 4 , &i ,&l))+64);
+ bits -= 12;
+ } else {
+ if ((sig&0xe0) == 0xc0) {/* 16-bit offset; 320 <= offset < 8192 */
+ off = ((((val&0x1f)<<8)|getbyte(isrc, i++, l))+320);
+ bits -= 16;
+ if (off > MPPE_HIST_LEN - 1) {
+ rtn &= ~MPPC_OK;
+ return (rtn);
+ }
+ } else { /* This shouldn't happen. */
+ rtn &= ~MPPC_OK;
+ return (rtn);
+ }
+ }
+ }
+ /* Decode length of match. */
+ val = getbyte(isrc, i, l);
+ if ((val & 0x80) == 0x00) { /* len = 3 */
+ len = 3;
+ bits--;
+ getbits(isrc, 1 , &i ,&l);
+ } else if ((val & 0xc0) == 0x80) { /* 4 <= len < 8 */
+ len = 0x04 | ((val>>4) & 0x03);
+ bits -= 4;
+ getbits(isrc, 4 , &i ,&l);
+ } else if ((val & 0xe0) == 0xc0) { /* 8 <= len < 16 */
+ len = 0x08 | ((val>>2) & 0x07);
+ bits -= 6;
+ getbits(isrc, 6 , &i ,&l);
+ } else if ((val & 0xf0) == 0xe0) { /* 16 <= len < 32 */
+ len = 0x10 | (val & 0x0f);
+ bits -= 8;
+ i++;
+ } else {
+ bits -= 8;
+ val = (val << 8) | getbyte(isrc, ++i, l);
+ if ((val & 0xf800) == 0xf000) { /* 32 <= len < 64 */
+ len = 0x0020 | ((val >> 6) & 0x001f);
+ bits -= 2;
+ getbits(isrc, 2 , &i ,&l);
+ } else if ((val & 0xfc00) == 0xf800) { /* 64 <= len < 128 */
+ len = 0x0040 | ((val >> 4) & 0x003f);
+ bits -= 4;
+ getbits(isrc, 4 , &i ,&l);
+ } else if ((val & 0xfe00) == 0xfc00) { /* 128 <= len < 256 */
+ len = 0x0080 | ((val >> 2) & 0x007f);
+ bits -= 6;
+ getbits(isrc, 6 , &i ,&l);
+ } else if ((val & 0xff00) == 0xfe00) { /* 256 <= len < 512 */
+ len = 0x0100 | (val & 0x00ff);
+ bits -= 8;
+ i++;
+ } else {
+ bits -= 8;
+ val = (val << 8) | getbyte(isrc, ++i, l);
+ if ((val & 0xff8000) == 0xff0000) { /* 512 <= len < 1024 */
+ len = 0x000200 | ((val >> 6) & 0x0001ff);
+ bits -= 2;
+ getbits(isrc, 2 , &i ,&l);
+ } else if ((val & 0xffc000) == 0xff8000) {/* 1024 <= len < 2048 */
+ len = 0x000400 | ((val >> 4) & 0x0003ff);
+ bits -= 4;
+ getbits(isrc, 4 , &i ,&l);
+ } else if ((val & 0xffe000) == 0xffc000) {/* 2048 <= len < 4096 */
+ len = 0x000800 | ((val >> 2) & 0x0007ff);
+ bits -= 6;
+ getbits(isrc, 6 , &i ,&l);
+ } else if ((val & 0xfff000) == 0xffe000) {/* 4096 <= len < 8192 */
+ len = 0x001000 | (val & 0x000fff);
+ bits -= 8;
+ i++;
+ } else { /* NOTREACHED */
+ rtn &= ~MPPC_OK;
+ return (rtn);
+ }
+ }
+ }
+
+ s = state->hist + state->histptr;
+ state->histptr += len;
+ olen += len;
+ if (state->histptr < 2*MPPE_HIST_LEN) {
+ /* Copy uncompressed bytes to the history. */
+
+ /*
+ * In some cases len may be greater than off. It means that memory
+ * areas pointed by s and s-off overlap. To decode that strange case
+ * data should be copied exactly by address increasing to make
+ * some data repeated.
+ */
+ lamecopy(s, s - off, len);
+ } else {
+ /* Buffer overflow; drop packet. */
+ rtn &= ~MPPC_OK;
+ return (rtn);
+ }
+ }
+
+ /* Do PFC decompression. */
+ len = olen;
+ if ((hist[0] & 0x01) != 0) {
+ (*dst)[0] = 0;
+ (*dst)++;
+ len++;
+ }
+
+ if (len <= *dstCnt) {
+ /* Copy uncompressed packet to the output buffer. */
+ memcpy(*dst, hist, olen);
+ } else {
+ /* Buffer overflow; drop packet. */
+ rtn |= MPPC_DEST_EXHAUSTED;
+ }
+
+ *src += *srcCnt;
+ *srcCnt = 0;
+ *dst += len;
+ *dstCnt -= len;
+
+ return (rtn);
+}
Property changes on: trunk/sys/net/mppcd.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/netisr.c
===================================================================
--- trunk/sys/net/netisr.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/netisr.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/netisr.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/net/netisr.c 282832 2015-05-13 08:04:50Z hiren $");
/*
* netisr is a packet dispatch service, allowing synchronous (directly
@@ -155,25 +155,15 @@
"netisr dispatch policy");
/*
- * These sysctls were used in previous versions to control and export
- * dispatch policy state. Now, we provide read-only export via them so that
- * older netstat binaries work. At some point they can be garbage collected.
- */
-static int netisr_direct_force;
-SYSCTL_INT(_net_isr, OID_AUTO, direct_force, CTLFLAG_RD,
- &netisr_direct_force, 0, "compat: force direct dispatch");
-
-static int netisr_direct;
-SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RD, &netisr_direct, 0,
- "compat: enable direct dispatch");
-
-/*
* Allow the administrator to limit the number of threads (CPUs) to use for
* netisr. We don't check netisr_maxthreads before creating the thread for
- * CPU 0, so in practice we ignore values <= 1. This must be set at boot.
- * We will create at most one thread per CPU.
+ * CPU 0. This must be set at boot. We will create at most one thread per CPU.
+ * By default we initialize this to 1 which would assign just 1 cpu (cpu0) and
+ * therefore only 1 workstream. If set to -1, netisr would use all cpus
+ * (mp_ncpus) and therefore would have those many workstreams. One workstream
+ * per thread (CPU).
*/
-static int netisr_maxthreads = -1; /* Max number of threads. */
+static int netisr_maxthreads = 1; /* Max number of threads. */
TUNABLE_INT("net.isr.maxthreads", &netisr_maxthreads);
SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN,
&netisr_maxthreads, 0,
@@ -339,32 +329,6 @@
return (EINVAL);
}
-static void
-netisr_dispatch_policy_compat(void)
-{
-
- switch (netisr_dispatch_policy) {
- case NETISR_DISPATCH_DEFERRED:
- netisr_direct_force = 0;
- netisr_direct = 0;
- break;
-
- case NETISR_DISPATCH_HYBRID:
- netisr_direct_force = 0;
- netisr_direct = 1;
- break;
-
- case NETISR_DISPATCH_DIRECT:
- netisr_direct_force = 1;
- netisr_direct = 1;
- break;
-
- default:
- panic("%s: unknown policy %u", __func__,
- netisr_dispatch_policy);
- }
-}
-
static int
sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
{
@@ -380,10 +344,8 @@
&dispatch_policy);
if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
error = EINVAL;
- if (error == 0) {
+ if (error == 0)
netisr_dispatch_policy = dispatch_policy;
- netisr_dispatch_policy_compat();
- }
}
return (error);
}
@@ -728,12 +690,13 @@
}
if (policy == NETISR_POLICY_FLOW) {
- if (!(m->m_flags & M_FLOWID) && npp->np_m2flow != NULL) {
+ if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE &&
+ npp->np_m2flow != NULL) {
m = npp->np_m2flow(m, source);
if (m == NULL)
return (NULL);
}
- if (m->m_flags & M_FLOWID) {
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
*cpuidp =
netisr_default_flow2cpu(m->m_pkthdr.flowid);
return (m);
@@ -1169,8 +1132,10 @@
KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__));
NETISR_LOCK_INIT();
- if (netisr_maxthreads < 1)
- netisr_maxthreads = 1;
+ if (netisr_maxthreads == 0 || netisr_maxthreads < -1 )
+ netisr_maxthreads = 1; /* default behavior */
+ else if (netisr_maxthreads == -1)
+ netisr_maxthreads = mp_ncpus; /* use max cpus */
if (netisr_maxthreads > mp_ncpus) {
printf("netisr_init: forcing maxthreads from %d to %d\n",
netisr_maxthreads, mp_ncpus);
@@ -1200,10 +1165,9 @@
&dispatch_policy);
if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
error = EINVAL;
- if (error == 0) {
+ if (error == 0)
netisr_dispatch_policy = dispatch_policy;
- netisr_dispatch_policy_compat();
- } else
+ else
printf(
"%s: invalid dispatch policy %s, using default\n",
__func__, tmp);
Modified: trunk/sys/net/netisr.h
===================================================================
--- trunk/sys/net/netisr.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/netisr.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/netisr.h 222249 2011-05-24 12:34:19Z rwatson $
+ * $FreeBSD: stable/10/sys/net/netisr.h 222249 2011-05-24 12:34:19Z rwatson $
*/
#ifndef _NET_NETISR_H_
Modified: trunk/sys/net/netisr_internal.h
===================================================================
--- trunk/sys/net/netisr_internal.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/netisr_internal.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/netisr_internal.h 222249 2011-05-24 12:34:19Z rwatson $
+ * $FreeBSD: stable/10/sys/net/netisr_internal.h 222249 2011-05-24 12:34:19Z rwatson $
*/
#ifndef _NET_NETISR_INTERNAL_H_
Modified: trunk/sys/net/netmap.h
===================================================================
--- trunk/sys/net/netmap.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/netmap.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,244 +1,280 @@
/* $MidnightBSD$ */
/*
- * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
- *
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
* Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
- *
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the
- * distribution.
- *
- * 3. Neither the name of the authors nor the names of their contributors
- * may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
*/
/*
- * $FreeBSD: stable/9/sys/net/netmap.h 247230 2013-02-24 18:26:17Z luigi $
- * $Id: netmap.h 11997 2013-01-17 21:59:12Z luigi $
+ * $FreeBSD: stable/10/sys/net/netmap.h 270292 2014-08-21 19:42:03Z np $
*
* Definitions of constants and the structures used by the netmap
* framework, for the part visible to both kernel and userspace.
* Detailed info on netmap is available with "man netmap" or at
- *
+ *
* http://info.iet.unipi.it/~luigi/netmap/
+ *
+ * This API is also used to communicate with the VALE software switch
*/
#ifndef _NET_NETMAP_H_
#define _NET_NETMAP_H_
+#define NETMAP_API 11 /* current API version */
+
+#define NETMAP_MIN_API 11 /* min and max versions accepted */
+#define NETMAP_MAX_API 15
/*
+ * Some fields should be cache-aligned to reduce contention.
+ * The alignment is architecture and OS dependent, but rather than
+ * digging into OS headers to find the exact value we use an estimate
+ * that should cover most architectures.
+ */
+#define NM_CACHE_ALIGN 128
+
+/*
* --- Netmap data structures ---
*
- * The data structures used by netmap are shown below. Those in
- * capital letters are in an mmapp()ed area shared with userspace,
- * while others are private to the kernel.
- * Shared structures do not contain pointers but only memory
- * offsets, so that addressing is portable between kernel and userspace.
+ * The userspace data structures used by netmap are shown below.
+ * They are allocated by the kernel and mmap()ed by userspace threads.
+ * Pointers are implemented as memory offsets or indexes,
+ * so that they can be easily dereferenced in kernel and userspace.
+ KERNEL (opaque, obviously)
- softc
-+----------------+
-| standard fields|
-| if_pspare[0] ----------+
-+----------------+ |
- |
-+----------------+<------+
-|(netmap_adapter)|
-| | netmap_kring
-| tx_rings *--------------------------------->+---------------+
-| | netmap_kring | ring *---------.
-| rx_rings *--------->+---------------+ | nr_hwcur | |
-+----------------+ | ring *--------. | nr_hwavail | V
- | nr_hwcur | | | selinfo | |
- | nr_hwavail | | +---------------+ .
- | selinfo | | | ... | .
- +---------------+ | |(ntx+1 entries)|
- | .... | | | |
- |(nrx+1 entries)| | +---------------+
- | | |
- KERNEL +---------------+ |
- |
====================================================================
|
- USERSPACE | NETMAP_RING
- +---->+-------------+
- / | cur |
- NETMAP_IF (nifp, one per file desc.) / | avail |
- +---------------+ / | buf_ofs |
- | ni_tx_rings | / +=============+
- | ni_rx_rings | / | buf_idx | slot[0]
- | | / | len, flags |
- | | / +-------------+
- +===============+ / | buf_idx | slot[1]
- | txring_ofs[0] | (rel.to nifp)--' | len, flags |
- | txring_ofs[1] | +-------------+
- (num_rings+1 entries) (nr_num_slots entries)
- | txring_ofs[n] | | buf_idx | slot[n-1]
- +---------------+ | len, flags |
- | rxring_ofs[0] | +-------------+
+ USERSPACE | struct netmap_ring
+ +---->+---------------+
+ / | head,cur,tail |
+ struct netmap_if (nifp, 1 per fd) / | buf_ofs |
+ +---------------+ / | other fields |
+ | ni_tx_rings | / +===============+
+ | ni_rx_rings | / | buf_idx, len | slot[0]
+ | | / | flags, ptr |
+ | | / +---------------+
+ +===============+ / | buf_idx, len | slot[1]
+ | txring_ofs[0] | (rel.to nifp)--' | flags, ptr |
+ | txring_ofs[1] | +---------------+
+ (tx+1 entries) (num_slots entries)
+ | txring_ofs[t] | | buf_idx, len | slot[n-1]
+ +---------------+ | flags, ptr |
+ | rxring_ofs[0] | +---------------+
| rxring_ofs[1] |
- (num_rings+1 entries)
- | txring_ofs[n] |
+ (rx+1 entries)
+ | rxring_ofs[r] |
+---------------+
- * The private descriptor ('softc' or 'adapter') of each interface
- * is extended with a "struct netmap_adapter" containing netmap-related
- * info (see description in dev/netmap/netmap_kernel.h.
- * Among other things, tx_rings and rx_rings point to the arrays of
- * "struct netmap_kring" which in turn reache the various
- * "struct netmap_ring", shared with userspace.
-
- * The NETMAP_RING is the userspace-visible replica of the NIC ring.
- * Each slot has the index of a buffer, its length and some flags.
+ * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to
+ * a file descriptor, the mmap()ed region contains a (logically readonly)
+ * struct netmap_if pointing to struct netmap_ring's.
+ *
+ * There is one netmap_ring per physical NIC ring, plus one tx/rx ring
+ * pair attached to the host stack (this pair is unused for non-NIC ports).
+ *
+ * All physical/host stack ports share the same memory region,
+ * so that zero-copy can be implemented between them.
+ * VALE switch ports instead have separate memory regions.
+ *
+ * The netmap_ring is the userspace-visible replica of the NIC ring.
+ * Each slot has the index of a buffer (MTU-sized and residing in the
+ * mmapped region), its length and some flags. An extra 64-bit pointer
+ * is provided for user-supplied buffers in the tx path.
+ *
* In user space, the buffer address is computed as
- * (char *)ring + buf_ofs + index*NETMAP_BUF_SIZE
- * In the kernel, buffers do not necessarily need to be contiguous,
- * and the virtual and physical addresses are derived through
- * a lookup table.
+ * (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE
*
- * struct netmap_slot:
+ * Added in NETMAP_API 11:
*
- * buf_idx is the index of the buffer associated to the slot.
- * len is the length of the payload
- * NS_BUF_CHANGED must be set whenever userspace wants
- * to change buf_idx (it might be necessary to
- * reprogram the NIC slot)
- * NS_REPORT must be set if we want the NIC to generate an interrupt
- * when this slot is used. Leaving it to 0 improves
- * performance.
- * NS_FORWARD if set on a receive ring, and the device is in
- * transparent mode, buffers released with the flag set
- * will be forwarded to the 'other' side (host stack
- * or NIC, respectively) on the next select() or ioctl()
- * NS_NO_LEARN on a VALE switch, do not 'learn' the source port for
- * this packet.
- * NS_PORT_MASK the high 8 bits of the flag, if not zero, indicate the
- * destination port for the VALE switch, overriding
- * the lookup table.
+ * + NIOCREGIF can request the allocation of extra spare buffers from
+ * the same memory pool. The desired number of buffers must be in
+ * nr_arg3. The ioctl may return fewer buffers, depending on memory
+ * availability. nr_arg3 will return the actual value, and, once
+ * mapped, nifp->ni_bufs_head will be the index of the first buffer.
+ *
+ * The buffers are linked to each other using the first uint32_t
+ * as the index. On close, ni_bufs_head must point to the list of
+ * buffers to be released.
+ *
+ * + NIOCREGIF can request space for extra rings (and buffers)
+ * allocated in the same memory space. The number of extra rings
+ * is in nr_arg1, and is advisory. This is a no-op on NICs where
+ * the size of the memory space is fixed.
+ *
+ * + NIOCREGIF can attach to PIPE rings sharing the same memory
+ * space with a parent device. The ifname indicates the parent device,
+ * which must already exist. Flags in nr_flags indicate if we want to
+ * bind the master or slave side, the index (from nr_ringid)
+ * is just a cookie and does not need to be sequential.
+ *
+ * + NIOCREGIF can also attach to 'monitor' rings that replicate
+ * the content of specific rings, also from the same memory space.
+ *
+ * Extra flags in nr_flags support the above functions.
+ * Application libraries may use the following naming scheme:
+ * netmap:foo all NIC ring pairs
+ * netmap:foo^ only host ring pair
+ * netmap:foo+ all NIC ring + host ring pairs
+ * netmap:foo-k the k-th NIC ring pair
+ * netmap:foo{k PIPE ring pair k, master side
+ * netmap:foo}k PIPE ring pair k, slave side
*/
+/*
+ * struct netmap_slot is a buffer descriptor
+ */
struct netmap_slot {
- uint32_t buf_idx; /* buffer index */
- uint16_t len; /* packet length, to be copied to/from the hw ring */
- uint16_t flags; /* buf changed, etc. */
-#define NS_BUF_CHANGED 0x0001 /* must resync the map, buffer changed */
-#define NS_REPORT 0x0002 /* ask the hardware to report results
- * e.g. by generating an interrupt
- */
-#define NS_FORWARD 0x0004 /* pass packet to the other endpoint
- * (host stack or device)
- */
-#define NS_NO_LEARN 0x0008
+ uint32_t buf_idx; /* buffer index */
+ uint16_t len; /* length for this slot */
+ uint16_t flags; /* buf changed, etc. */
+ uint64_t ptr; /* pointer for indirect buffers */
+};
+
+/*
+ * The following flags control how the slot is used
+ */
+
+#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */
+ /*
+ * must be set whenever buf_idx is changed (as it might be
+ * necessary to recompute the physical address and mapping)
+ */
+
+#define NS_REPORT 0x0002 /* ask the hardware to report results */
+ /*
+ * Request notification when slot is used by the hardware.
+ * Normally transmit completions are handled lazily and
+ * may be unreported. This flag lets us know when a slot
+ * has been sent (e.g. to terminate the sender).
+ */
+
+#define NS_FORWARD 0x0004 /* pass packet 'forward' */
+ /*
+ * (Only for physical ports, rx rings with NR_FORWARD set).
+ * Slot released to the kernel (i.e. before ring->head) with
+ * this flag set are passed to the peer ring (host/NIC),
+ * thus restoring the host-NIC connection for these slots.
+ * This supports efficient traffic monitoring or firewalling.
+ */
+
+#define NS_NO_LEARN 0x0008 /* disable bridge learning */
+ /*
+ * On a VALE switch, do not 'learn' the source port for
+ * this buffer.
+ */
+
+#define NS_INDIRECT 0x0010 /* userspace buffer */
+ /*
+ * (VALE tx rings only) data is in a userspace buffer,
+ * whose address is in the 'ptr' field in the slot.
+ */
+
+#define NS_MOREFRAG 0x0020 /* packet has more fragments */
+ /*
+ * (VALE ports only)
+ * Set on all but the last slot of a multi-segment packet.
+ * The 'len' field refers to the individual fragment.
+ */
+
#define NS_PORT_SHIFT 8
#define NS_PORT_MASK (0xff << NS_PORT_SHIFT)
-};
+ /*
+ * The high 8 bits of the flag, if not zero, indicate the
+ * destination port for the VALE switch, overriding
+ * the lookup table.
+ */
+#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff)
+ /*
+ * (VALE rx rings only) the high 8 bits
+ * are the number of fragments.
+ */
+
+
/*
+ * struct netmap_ring
+ *
* Netmap representation of a TX or RX ring (also known as "queue").
* This is a queue implemented as a fixed-size circular array.
- * At the software level, two fields are important: avail and cur.
+ * At the software level the important fields are: head, cur, tail.
*
* In TX rings:
- * avail indicates the number of slots available for transmission.
- * It is updated by the kernel after every netmap system call.
- * It MUST BE decremented by the application when it appends a
- * packet.
- * cur indicates the slot to use for the next packet
- * to send (i.e. the "tail" of the queue).
- * It MUST BE incremented by the application before
- * netmap system calls to reflect the number of newly
- * sent packets.
- * It is checked by the kernel on netmap system calls
- * (normally unmodified by the kernel unless invalid).
*
- * The kernel side of netmap uses two additional fields in its own
- * private ring structure, netmap_kring:
- * nr_hwcur is a copy of nr_cur on an NIOCTXSYNC.
- * nr_hwavail is the number of slots known as available by the
- * hardware. It is updated on an INTR (inc by the
- * number of packets sent) and on a NIOCTXSYNC
- * (decrease by nr_cur - nr_hwcur)
- * A special case, nr_hwavail is -1 if the transmit
- * side is idle (no pending transmits).
+ * head first slot available for transmission.
+ * cur wakeup point. select() and poll() will unblock
+ * when 'tail' moves past 'cur'
+ * tail (readonly) first slot reserved to the kernel
*
+ * [head .. tail-1] can be used for new packets to send;
+ * 'head' and 'cur' must be incremented as slots are filled
+ * with new packets to be sent;
+ * 'cur' can be moved further ahead if we need more space
+ * for new transmissions. XXX todo (2014-03-12)
+ *
* In RX rings:
- * avail is the number of packets available (possibly 0).
- * It MUST BE decremented by the application when it consumes
- * a packet, and it is updated to nr_hwavail on a NIOCRXSYNC
- * cur indicates the first slot that contains a packet not
- * processed yet (the "head" of the queue).
- * It MUST BE incremented by the software when it consumes
- * a packet.
- * reserved indicates the number of buffers before 'cur'
- * that the application has still in use. Normally 0,
- * it MUST BE incremented by the application when it
- * does not return the buffer immediately, and decremented
- * when the buffer is finally freed.
*
- * The kernel side of netmap uses two additional fields in the kring:
- * nr_hwcur is a copy of nr_cur on an NIOCRXSYNC
- * nr_hwavail is the number of packets available. It is updated
- * on INTR (inc by the number of new packets arrived)
- * and on NIOCRXSYNC (decreased by nr_cur - nr_hwcur).
+ * head first valid received packet
+ * cur wakeup point. select() and poll() will unblock
+ * when 'tail' moves past 'cur'
+ * tail (readonly) first slot reserved to the kernel
*
+ * [head .. tail-1] contain received packets;
+ * 'head' and 'cur' must be incremented as slots are consumed
+ * and can be returned to the kernel;
+ * 'cur' can be moved further ahead if we want to wait for
+ * new packets without returning the previous ones.
+ *
* DATA OWNERSHIP/LOCKING:
- * The netmap_ring is owned by the user program and it is only
- * accessed or modified in the upper half of the kernel during
- * a system call.
+ * The netmap_ring, and all slots and buffers in the range
+ * [head .. tail-1] are owned by the user program;
+ * the kernel only accesses them during a netmap system call
+ * and in the user thread context.
*
- * The netmap_kring is only modified by the upper half of the kernel.
- *
- * FLAGS
- * NR_TIMESTAMP updates the 'ts' field on each syscall. This is
- * a global timestamp for all packets.
- * NR_RX_TSTMP if set, the last 64 byte in each buffer will
- * contain a timestamp for the frame supplied by
- * the hardware (if supported)
- * NR_FORWARD if set, the NS_FORWARD flag in each slot of the
- * RX ring is checked, and if set the packet is
- * passed to the other side (host stack or device,
- * respectively). This permits bpf-like behaviour
- * or transparency for selected packets.
+ * Other slots and buffers are reserved for use by the kernel
*/
struct netmap_ring {
/*
- * nr_buf_base_ofs is meant to be used through macros.
+ * buf_ofs is meant to be used through macros.
* It contains the offset of the buffer region from this
* descriptor.
*/
- const ssize_t buf_ofs;
+ const int64_t buf_ofs;
const uint32_t num_slots; /* number of slots in the ring. */
- uint32_t avail; /* number of usable slots */
- uint32_t cur; /* 'current' r/w position */
- uint32_t reserved; /* not refilled before current */
+ const uint32_t nr_buf_size;
+ const uint16_t ringid;
+ const uint16_t dir; /* 0: tx, 1: rx */
- const uint16_t nr_buf_size;
- uint16_t flags;
-#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */
-#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */
-#define NR_RX_TSTMP 0x0008 /* set rx timestamp in slots */
+ uint32_t head; /* (u) first user slot */
+ uint32_t cur; /* (u) wakeup point */
+ uint32_t tail; /* (k) first kernel slot */
- struct timeval ts; /* time of last *sync() */
+ uint32_t flags;
+ struct timeval ts; /* (k) time of last *sync() */
+
+ /* opaque room for a mutex or similar object */
+ uint8_t sem[128] __attribute__((__aligned__(NM_CACHE_ALIGN)));
+
/* the slots follow. This struct has variable size */
struct netmap_slot slot[0]; /* array of slots. */
};
@@ -245,53 +281,191 @@
/*
+ * RING FLAGS
+ */
+#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */
+ /*
+ * updates the 'ts' field on each netmap syscall. This saves
+ * saves a separate gettimeofday(), and is not much worse than
+ * software timestamps generated in the interrupt handler.
+ */
+
+#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */
+ /*
+ * Enables the NS_FORWARD slot flag for the ring.
+ */
+
+
+/*
* Netmap representation of an interface and its queue(s).
+ * This is initialized by the kernel when binding a file
+ * descriptor to a port, and should be considered as readonly
+ * by user programs. The kernel never uses it.
+ *
* There is one netmap_if for each file descriptor on which we want
- * to select/poll. We assume that on each interface has the same number
- * of receive and transmit queues.
+ * to select/poll.
* select/poll operates on one or all pairs depending on the value of
* nmr_queueid passed on the ioctl.
*/
struct netmap_if {
char ni_name[IFNAMSIZ]; /* name of the interface. */
- const u_int ni_version; /* API version, currently unused */
- const u_int ni_rx_rings; /* number of rx rings */
- const u_int ni_tx_rings; /* if zero, same as ni_rx_rings */
+ const uint32_t ni_version; /* API version, currently unused */
+ const uint32_t ni_flags; /* properties */
+#define NI_PRIV_MEM 0x1 /* private memory region */
+
/*
+ * The number of packet rings available in netmap mode.
+ * Physical NICs can have different numbers of tx and rx rings.
+ * Physical NICs also have a 'host' ring pair.
+ * Additionally, clients can request additional ring pairs to
+ * be used for internal communication.
+ */
+ const uint32_t ni_tx_rings; /* number of HW tx rings */
+ const uint32_t ni_rx_rings; /* number of HW rx rings */
+
+ uint32_t ni_bufs_head; /* head index for extra bufs */
+ uint32_t ni_spare1[5];
+ /*
* The following array contains the offset of each netmap ring
- * from this structure. The first ni_tx_queues+1 entries refer
- * to the tx rings, the next ni_rx_queues+1 refer to the rx rings
- * (the last entry in each block refers to the host stack rings).
- * The area is filled up by the kernel on NIOCREG,
+ * from this structure, in the following order:
+ * NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings;
+ * NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings.
+ *
+ * The area is filled up by the kernel on NIOCREGIF,
* and then only read by userspace code.
*/
const ssize_t ring_ofs[0];
};
-#ifndef NIOCREGIF
+
+#ifndef NIOCREGIF
/*
* ioctl names and related fields
*
+ * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues,
+ * whose identity is set in NIOCREGIF through nr_ringid.
+ * These are non blocking and take no argument.
+ *
* NIOCGINFO takes a struct ifreq, the interface name is the input,
* the outputs are number of queues and number of descriptor
* for each queue (useful to set number of threads etc.).
+ * The info returned is only advisory and may change before
+ * the interface is bound to a file descriptor.
*
- * NIOCREGIF takes an interface name within a struct ifreq,
+ * NIOCREGIF takes an interface name within a struct nmre,
* and activates netmap mode on the interface (if possible).
*
- * NIOCUNREGIF unregisters the interface associated to the fd.
+ * The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we
+ * can pass it down to other NIC-related ioctls.
*
- * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues,
- * whose identity is set in NIOCREGIF through nr_ringid
+ * The actual argument (struct nmreq) has a number of options to request
+ * different functions.
+ * The following are used in NIOCREGIF when nr_cmd == 0:
+ *
+ * nr_name (in)
+ * The name of the port (em0, valeXXX:YYY, etc.)
+ * limited to IFNAMSIZ for backward compatibility.
+ *
+ * nr_version (in/out)
+ * Must match NETMAP_API as used in the kernel, error otherwise.
+ * Always returns the desired value on output.
+ *
+ * nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out)
+ * On input, non-zero values may be used to reconfigure the port
+ * according to the requested values, but this is not guaranteed.
+ * On output the actual values in use are reported.
+ *
+ * nr_ringid (in)
+ * Indicates how rings should be bound to the file descriptors.
+ * If nr_flags != 0, then the low bits (in NETMAP_RING_MASK)
+ * are used to indicate the ring number, and nr_flags specifies
+ * the actual rings to bind. NETMAP_NO_TX_POLL is unaffected.
+ *
+ * NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED:
+ * If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control
+ * the binding as follows:
+ * 0 (default) binds all physical rings
+ * NETMAP_HW_RING | ring number binds a single ring pair
+ * NETMAP_SW_RING binds only the host tx/rx rings
+ *
+ * NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push
+ * packets on tx rings only if POLLOUT is set.
+ * The default is to push any pending packet.
+ *
+ * NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release
+ * packets on rx rings also when POLLIN is NOT set.
+ * The default is to touch the rx ring only with POLLIN.
+ * Note that this is the opposite of TX because it
+ * reflects the common usage.
+ *
+ * NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead.
+ * NETMAP_PRIV_MEM is set on return for ports that do not use
+ * the global memory allocator.
+ * This information is not significant and applications
+ * should look at the region id in nr_arg2
+ *
+ * nr_flags is the recommended mode to indicate which rings should
+ * be bound to a file descriptor. Values are NR_REG_*
+ *
+ * nr_arg1 (in) The number of extra rings to be reserved.
+ * Especially when allocating a VALE port the system only
+ * allocates the amount of memory needed for the port.
+ * If more shared memory rings are desired (e.g. for pipes),
+ * the first invocation for the same basename/allocator
+ * should specify a suitable number. Memory cannot be
+ * extended after the first allocation without closing
+ * all ports on the same region.
+ *
+ * nr_arg2 (in/out) The identity of the memory region used.
+ * On input, 0 means the system decides autonomously,
+ * other values may try to select a specific region.
+ * On return the actual value is reported.
+ * Region '1' is the global allocator, normally shared
+ * by all interfaces. Other values are private regions.
+ * If two ports the same region zero-copy is possible.
+ *
+ * nr_arg3 (in/out) number of extra buffers to be allocated.
+ *
+ *
+ *
+ * nr_cmd (in) if non-zero indicates a special command:
+ * NETMAP_BDG_ATTACH and nr_name = vale*:ifname
+ * attaches the NIC to the switch; nr_ringid specifies
+ * which rings to use. Used by vale-ctl -a ...
+ * nr_arg1 = NETMAP_BDG_HOST also attaches the host port
+ * as in vale-ctl -h ...
+ *
+ * NETMAP_BDG_DETACH and nr_name = vale*:ifname
+ * disconnects a previously attached NIC.
+ * Used by vale-ctl -d ...
+ *
+ * NETMAP_BDG_LIST
+ * list the configuration of VALE switches.
+ *
+ * NETMAP_BDG_VNET_HDR
+ * Set the virtio-net header length used by the client
+ * of a VALE switch port.
+ *
+ * NETMAP_BDG_NEWIF
+ * create a persistent VALE port with name nr_name.
+ * Used by vale-ctl -n ...
+ *
+ * NETMAP_BDG_DELIF
+ * delete a persistent VALE port. Used by vale-ctl -d ...
+ *
+ * nr_arg1, nr_arg2, nr_arg3 (in/out) command specific
+ *
+ *
+ *
*/
+
/*
- * struct nmreq overlays a struct ifreq
+ * struct nmreq overlays a struct ifreq (just the name)
*/
struct nmreq {
char nr_name[IFNAMSIZ];
uint32_t nr_version; /* API version */
-#define NETMAP_API 3 /* current version */
uint32_t nr_offset; /* nifp offset in the shared region */
uint32_t nr_memsize; /* size of the shared region */
uint32_t nr_tx_slots; /* slots in tx rings */
@@ -298,15 +472,50 @@
uint32_t nr_rx_slots; /* slots in rx rings */
uint16_t nr_tx_rings; /* number of tx rings */
uint16_t nr_rx_rings; /* number of rx rings */
+
uint16_t nr_ringid; /* ring(s) we care about */
-#define NETMAP_HW_RING 0x4000 /* low bits indicate one hw ring */
-#define NETMAP_SW_RING 0x2000 /* process the sw ring */
+#define NETMAP_HW_RING 0x4000 /* single NIC ring pair */
+#define NETMAP_SW_RING 0x2000 /* only host ring pair */
+
+#define NETMAP_RING_MASK 0x0fff /* the ring number */
+
#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */
-#define NETMAP_RING_MASK 0xfff /* the ring number */
- uint16_t spare1;
- uint32_t spare2[4];
+
+#define NETMAP_DO_RX_POLL 0x8000 /* DO automatic rxsync on poll */
+
+ uint16_t nr_cmd;
+#define NETMAP_BDG_ATTACH 1 /* attach the NIC */
+#define NETMAP_BDG_DETACH 2 /* detach the NIC */
+#define NETMAP_BDG_REGOPS 3 /* register bridge callbacks */
+#define NETMAP_BDG_LIST 4 /* get bridge's info */
+#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */
+#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */
+#define NETMAP_BDG_NEWIF 6 /* create a virtual port */
+#define NETMAP_BDG_DELIF 7 /* destroy a virtual port */
+ uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */
+#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */
+
+ uint16_t nr_arg2;
+ uint32_t nr_arg3; /* req. extra buffers in NIOCREGIF */
+ uint32_t nr_flags;
+ /* various modes, extends nr_ringid */
+ uint32_t spare2[1];
};
+#define NR_REG_MASK 0xf /* values for nr_flags */
+enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */
+ NR_REG_ALL_NIC = 1,
+ NR_REG_SW = 2,
+ NR_REG_NIC_SW = 3,
+ NR_REG_ONE_NIC = 4,
+ NR_REG_PIPE_MASTER = 5,
+ NR_REG_PIPE_SLAVE = 6,
+};
+/* monitor uses the NR_REG to select the rings to monitor */
+#define NR_MONITOR_TX 0x100
+#define NR_MONITOR_RX 0x200
+
+
/*
* FreeBSD uses the size value embedded in the _IOWR to determine
* how much to copy in/out. So we need it to match the actual
@@ -315,9 +524,34 @@
*/
#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */
#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */
-#define NIOCUNREGIF _IO('i', 147) /* interface unregister */
#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */
#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */
+#define NIOCCONFIG _IOWR('i',150, struct nm_ifreq) /* for ext. modules */
#endif /* !NIOCREGIF */
+
+/*
+ * Helper functions for kernel and userspace
+ */
+
+/*
+ * check if space is available in the ring.
+ */
+static inline int
+nm_ring_empty(struct netmap_ring *ring)
+{
+ return (ring->cur == ring->tail);
+}
+
+/*
+ * Opaque structure that is passed to an external kernel
+ * module via ioctl(fd, NIOCCONFIG, req) for a user-owned
+ * bridge port (at this point ephemeral VALE interface).
+ */
+#define NM_IFRDATA_LEN 256
+struct nm_ifreq {
+ char nifr_name[IFNAMSIZ];
+ char data[NM_IFRDATA_LEN];
+};
+
#endif /* _NET_NETMAP_H_ */
Modified: trunk/sys/net/netmap_user.h
===================================================================
--- trunk/sys/net/netmap_user.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/netmap_user.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,42 +1,35 @@
/* $MidnightBSD$ */
/*
- * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
- *
+ * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
+ *
* Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
- *
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the
- * distribution.
- *
- * 3. Neither the name of the authors nor the names of their contributors
- * may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
*/
/*
- * $FreeBSD: stable/9/sys/net/netmap_user.h 246355 2013-02-05 09:40:31Z luigi $
- * $Id: netmap_user.h 10597 2012-02-21 05:08:32Z luigi $
+ * $FreeBSD: stable/10/sys/net/netmap_user.h 278775 2015-02-14 19:18:56Z luigi $
*
- * This header contains the macros used to manipulate netmap structures
- * and packets in userspace. See netmap(4) for more information.
+ * Functions and macros to manipulate netmap structures and packets
+ * in userspace. See netmap(4) for more information.
*
* The address of the struct netmap_if, say nifp, is computed from the
* value returned from ioctl(.., NIOCREG, ...) and the mmap region:
@@ -48,25 +41,47 @@
* From there:
* struct netmap_ring *NETMAP_TXRING(nifp, index)
* struct netmap_ring *NETMAP_RXRING(nifp, index)
- * we can access ring->nr_cur, ring->nr_avail, ring->nr_flags
+ * we can access ring->cur, ring->head, ring->tail, etc.
*
* ring->slot[i] gives us the i-th slot (we can access
- * directly plen, flags, bufindex)
+ * directly len, flags, buf_idx)
*
- * char *buf = NETMAP_BUF(ring, index) returns a pointer to
- * the i-th buffer
+ * char *buf = NETMAP_BUF(ring, x) returns a pointer to
+ * the buffer numbered x
*
- * Since rings are circular, we have macros to compute the next index
- * i = NETMAP_RING_NEXT(ring, i);
+ * All ring indexes (head, cur, tail) should always move forward.
+ * To compute the next index in a circular ring you can use
+ * i = nm_ring_next(ring, i);
+ *
+ * To ease porting apps from pcap to netmap we supply a few fuctions
+ * that can be called to open, close, read and write on netmap in a way
+ * similar to libpcap. Note that the read/write function depend on
+ * an ioctl()/select()/poll() being issued to refill rings or push
+ * packets out.
+ *
+ * In order to use these, include #define NETMAP_WITH_LIBS
+ * in the source file that invokes these functions.
*/
#ifndef _NET_NETMAP_USER_H_
#define _NET_NETMAP_USER_H_
+#include <stdint.h>
+#include <sys/socket.h> /* apple needs sockaddr */
+#include <net/if.h> /* IFNAMSIZ */
+
+#ifndef likely
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#endif /* likely and unlikely */
+
+#include <net/netmap.h>
+
+/* helper macro */
#define _NETMAP_OFFSET(type, ptr, offset) \
((type)(void *)((char *)(ptr) + (offset)))
-#define NETMAP_IF(b, o) _NETMAP_OFFSET(struct netmap_if *, b, o)
+#define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs)
#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
nifp, (nifp)->ring_ofs[index] )
@@ -79,19 +94,589 @@
#define NETMAP_BUF_IDX(ring, buf) \
( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
- (ring)->nr_buf_size )
+ (ring)->nr_buf_size )
-#define NETMAP_RING_NEXT(r, i) \
- ((i)+1 == (r)->num_slots ? 0 : (i) + 1 )
-#define NETMAP_RING_FIRST_RESERVED(r) \
- ( (r)->cur < (r)->reserved ? \
- (r)->cur + (r)->num_slots - (r)->reserved : \
- (r)->cur - (r)->reserved )
+static inline uint32_t
+nm_ring_next(struct netmap_ring *r, uint32_t i)
+{
+ return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1);
+}
+
/*
- * Return 1 if the given tx ring is empty.
+ * Return 1 if we have pending transmissions in the tx ring.
+ * When everything is complete ring->head = ring->tail + 1 (modulo ring size)
*/
-#define NETMAP_TX_RING_EMPTY(r) ((r)->avail >= (r)->num_slots - 1)
+static inline int
+nm_tx_pending(struct netmap_ring *r)
+{
+ return nm_ring_next(r, r->tail) != r->head;
+}
+
+static inline uint32_t
+nm_ring_space(struct netmap_ring *ring)
+{
+ int ret = ring->tail - ring->cur;
+ if (ret < 0)
+ ret += ring->num_slots;
+ return ret;
+}
+
+
+#ifdef NETMAP_WITH_LIBS
+/*
+ * Support for simple I/O libraries.
+ * Include other system headers required for compiling this.
+ */
+
+#ifndef HAVE_NETMAP_WITH_LIBS
+#define HAVE_NETMAP_WITH_LIBS
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <string.h> /* memset */
+#include <sys/ioctl.h>
+#include <sys/errno.h> /* EINVAL */
+#include <fcntl.h> /* O_RDWR */
+#include <unistd.h> /* close() */
+#include <signal.h>
+#include <stdlib.h>
+
+#ifndef ND /* debug macros */
+/* debug support */
+#define ND(_fmt, ...) do {} while(0)
+#define D(_fmt, ...) \
+ do { \
+ struct timeval _t0; \
+ gettimeofday(&_t0, NULL); \
+ fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \
+ (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec, \
+ __FUNCTION__, __LINE__, ##__VA_ARGS__); \
+ } while (0)
+
+/* Rate limited version of "D", lps indicates how many per second */
+#define RD(lps, format, ...) \
+ do { \
+ static int __t0, __cnt; \
+ struct timeval __xxts; \
+ gettimeofday(&__xxts, NULL); \
+ if (__t0 != __xxts.tv_sec) { \
+ __t0 = __xxts.tv_sec; \
+ __cnt = 0; \
+ } \
+ if (__cnt++ < lps) { \
+ D(format, ##__VA_ARGS__); \
+ } \
+ } while (0)
+#endif
+
+struct nm_pkthdr { /* same as pcap_pkthdr */
+ struct timeval ts;
+ uint32_t caplen;
+ uint32_t len;
+};
+
+struct nm_stat { /* same as pcap_stat */
+ u_int ps_recv;
+ u_int ps_drop;
+ u_int ps_ifdrop;
+#ifdef WIN32
+ u_int bs_capt;
+#endif /* WIN32 */
+};
+
+#define NM_ERRBUF_SIZE 512
+
+struct nm_desc {
+ struct nm_desc *self; /* point to self if netmap. */
+ int fd;
+ void *mem;
+ uint32_t memsize;
+ int done_mmap; /* set if mem is the result of mmap */
+ struct netmap_if * const nifp;
+ uint16_t first_tx_ring, last_tx_ring, cur_tx_ring;
+ uint16_t first_rx_ring, last_rx_ring, cur_rx_ring;
+ struct nmreq req; /* also contains the nr_name = ifname */
+ struct nm_pkthdr hdr;
+
+ /*
+ * The memory contains netmap_if, rings and then buffers.
+ * Given a pointer (e.g. to nm_inject) we can compare with
+ * mem/buf_start/buf_end to tell if it is a buffer or
+ * some other descriptor in our region.
+ * We also store a pointer to some ring as it helps in the
+ * translation from buffer indexes to addresses.
+ */
+ struct netmap_ring * const some_ring;
+ void * const buf_start;
+ void * const buf_end;
+ /* parameters from pcap_open_live */
+ int snaplen;
+ int promisc;
+ int to_ms;
+ char *errbuf;
+
+ /* save flags so we can restore them on close */
+ uint32_t if_flags;
+ uint32_t if_reqcap;
+ uint32_t if_curcap;
+
+ struct nm_stat st;
+ char msg[NM_ERRBUF_SIZE];
+};
+
+/*
+ * when the descriptor is open correctly, d->self == d
+ * Eventually we should also use some magic number.
+ */
+#define P2NMD(p) ((struct nm_desc *)(p))
+#define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d))
+#define NETMAP_FD(d) (P2NMD(d)->fd)
+
+
+/*
+ * this is a slightly optimized copy routine which rounds
+ * to multiple of 64 bytes and is often faster than dealing
+ * with other odd sizes. We assume there is enough room
+ * in the source and destination buffers.
+ *
+ * XXX only for multiples of 64 bytes, non overlapped.
+ */
+static inline void
+nm_pkt_copy(const void *_src, void *_dst, int l)
+{
+ const uint64_t *src = (const uint64_t *)_src;
+ uint64_t *dst = (uint64_t *)_dst;
+
+ if (unlikely(l >= 1024)) {
+ memcpy(dst, src, l);
+ return;
+ }
+ for (; likely(l > 0); l-=64) {
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ }
+}
+
+
+/*
+ * The callback, invoked on each received packet. Same as libpcap
+ */
+typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d);
+
+/*
+ *--- the pcap-like API ---
+ *
+ * nm_open() opens a file descriptor, binds to a port and maps memory.
+ *
+ * ifname (netmap:foo or vale:foo) is the port name
+ * a suffix can indicate the follwing:
+ * ^ bind the host (sw) ring pair
+ * * bind host and NIC ring pairs (transparent)
+ * -NN bind individual NIC ring pair
+ * {NN bind master side of pipe NN
+ * }NN bind slave side of pipe NN
+ *
+ * req provides the initial values of nmreq before parsing ifname.
+ * Remember that the ifname parsing will override the ring
+ * number in nm_ringid, and part of nm_flags;
+ * flags special functions, normally 0
+ * indicates which fields of *arg are significant
+ * arg special functions, normally NULL
+ * if passed a netmap_desc with mem != NULL,
+ * use that memory instead of mmap.
+ */
+
+static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req,
+ uint64_t flags, const struct nm_desc *arg);
+
+/*
+ * nm_open can import some fields from the parent descriptor.
+ * These flags control which ones.
+ * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL,
+ * which set the initial value for these flags.
+ * Note that the 16 low bits of the flags are reserved for data
+ * that may go into the nmreq.
+ */
+enum {
+ NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */
+ NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */
+ NM_OPEN_ARG1 = 0x100000,
+ NM_OPEN_ARG2 = 0x200000,
+ NM_OPEN_ARG3 = 0x400000,
+ NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */
+};
+
+
+/*
+ * nm_close() closes and restores the port to its previous state
+ */
+
+static int nm_close(struct nm_desc *);
+
+/*
+ * nm_inject() is the same as pcap_inject()
+ * nm_dispatch() is the same as pcap_dispatch()
+ * nm_nextpkt() is the same as pcap_next()
+ */
+
+static int nm_inject(struct nm_desc *, const void *, size_t);
+static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *);
+static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *);
+
+
+/*
+ * Try to open, return descriptor if successful, NULL otherwise.
+ * An invalid netmap name will return errno = 0;
+ * You can pass a pointer to a pre-filled nm_desc to add special
+ * parameters. Flags is used as follows
+ * NM_OPEN_NO_MMAP use the memory from arg, only
+ * if the nr_arg2 (memory block) matches.
+ * NM_OPEN_ARG1 use req.nr_arg1 from arg
+ * NM_OPEN_ARG2 use req.nr_arg2 from arg
+ * NM_OPEN_RING_CFG user ring config from arg
+ */
+static struct nm_desc *
+nm_open(const char *ifname, const struct nmreq *req,
+ uint64_t new_flags, const struct nm_desc *arg)
+{
+ struct nm_desc *d = NULL;
+ const struct nm_desc *parent = arg;
+ u_int namelen;
+ uint32_t nr_ringid = 0, nr_flags;
+ const char *port = NULL;
+ const char *errmsg = NULL;
+
+ if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) {
+ errno = 0; /* name not recognised, not an error */
+ return NULL;
+ }
+ if (ifname[0] == 'n')
+ ifname += 7;
+ /* scan for a separator */
+ for (port = ifname; *port && !index("-*^{}", *port); port++)
+ ;
+ namelen = port - ifname;
+ if (namelen >= sizeof(d->req.nr_name)) {
+ errmsg = "name too long";
+ goto fail;
+ }
+ switch (*port) {
+ default: /* '\0', no suffix */
+ nr_flags = NR_REG_ALL_NIC;
+ break;
+ case '-': /* one NIC */
+ nr_flags = NR_REG_ONE_NIC;
+ nr_ringid = atoi(port + 1);
+ break;
+ case '*': /* NIC and SW, ignore port */
+ nr_flags = NR_REG_NIC_SW;
+ if (port[1]) {
+ errmsg = "invalid port for nic+sw";
+ goto fail;
+ }
+ break;
+ case '^': /* only sw ring */
+ nr_flags = NR_REG_SW;
+ if (port[1]) {
+ errmsg = "invalid port for sw ring";
+ goto fail;
+ }
+ break;
+ case '{':
+ nr_flags = NR_REG_PIPE_MASTER;
+ nr_ringid = atoi(port + 1);
+ break;
+ case '}':
+ nr_flags = NR_REG_PIPE_SLAVE;
+ nr_ringid = atoi(port + 1);
+ break;
+ }
+
+ if (nr_ringid >= NETMAP_RING_MASK) {
+ errmsg = "invalid ringid";
+ goto fail;
+ }
+
+ d = (struct nm_desc *)calloc(1, sizeof(*d));
+ if (d == NULL) {
+ errmsg = "nm_desc alloc failure";
+ errno = ENOMEM;
+ return NULL;
+ }
+ d->self = d; /* set this early so nm_close() works */
+ d->fd = open("/dev/netmap", O_RDWR);
+ if (d->fd < 0) {
+ errmsg = "cannot open /dev/netmap";
+ goto fail;
+ }
+
+ if (req)
+ d->req = *req;
+ d->req.nr_version = NETMAP_API;
+ d->req.nr_ringid &= ~NETMAP_RING_MASK;
+
+ /* these fields are overridden by ifname and flags processing */
+ d->req.nr_ringid |= nr_ringid;
+ d->req.nr_flags = nr_flags;
+ memcpy(d->req.nr_name, ifname, namelen);
+ d->req.nr_name[namelen] = '\0';
+ /* optionally import info from parent */
+ if (IS_NETMAP_DESC(parent) && new_flags) {
+ if (new_flags & NM_OPEN_ARG1)
+ D("overriding ARG1 %d", parent->req.nr_arg1);
+ d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ?
+ parent->req.nr_arg1 : 4;
+ if (new_flags & NM_OPEN_ARG2)
+ D("overriding ARG2 %d", parent->req.nr_arg2);
+ d->req.nr_arg2 = new_flags & NM_OPEN_ARG2 ?
+ parent->req.nr_arg2 : 0;
+ if (new_flags & NM_OPEN_ARG3)
+ D("overriding ARG3 %d", parent->req.nr_arg3);
+ d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ?
+ parent->req.nr_arg3 : 0;
+ if (new_flags & NM_OPEN_RING_CFG) {
+ D("overriding RING_CFG");
+ d->req.nr_tx_slots = parent->req.nr_tx_slots;
+ d->req.nr_rx_slots = parent->req.nr_rx_slots;
+ d->req.nr_tx_rings = parent->req.nr_tx_rings;
+ d->req.nr_rx_rings = parent->req.nr_rx_rings;
+ }
+ if (new_flags & NM_OPEN_IFNAME) {
+ D("overriding ifname %s ringid 0x%x flags 0x%x",
+ parent->req.nr_name, parent->req.nr_ringid,
+ parent->req.nr_flags);
+ memcpy(d->req.nr_name, parent->req.nr_name,
+ sizeof(d->req.nr_name));
+ d->req.nr_ringid = parent->req.nr_ringid;
+ d->req.nr_flags = parent->req.nr_flags;
+ }
+ }
+ /* add the *XPOLL flags */
+ d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL);
+
+ if (ioctl(d->fd, NIOCREGIF, &d->req)) {
+ errmsg = "NIOCREGIF failed";
+ goto fail;
+ }
+
+ if (IS_NETMAP_DESC(parent) && parent->mem &&
+ parent->req.nr_arg2 == d->req.nr_arg2) {
+ /* do not mmap, inherit from parent */
+ d->memsize = parent->memsize;
+ d->mem = parent->mem;
+ } else {
+ /* XXX TODO: check if memsize is too large (or there is overflow) */
+ d->memsize = d->req.nr_memsize;
+ d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+ d->fd, 0);
+ if (d->mem == MAP_FAILED) {
+ errmsg = "mmap failed";
+ goto fail;
+ }
+ d->done_mmap = 1;
+ }
+ {
+ struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset);
+ struct netmap_ring *r = NETMAP_RXRING(nifp, );
+
+ *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp;
+ *(struct netmap_ring **)(uintptr_t)&d->some_ring = r;
+ *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0);
+ *(void **)(uintptr_t)&d->buf_end =
+ (char *)d->mem + d->memsize;
+ }
+
+ if (d->req.nr_flags == NR_REG_SW) { /* host stack */
+ d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings;
+ d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings;
+ } else if (d->req.nr_flags == NR_REG_ALL_NIC) { /* only nic */
+ d->first_tx_ring = 0;
+ d->first_rx_ring = 0;
+ d->last_tx_ring = d->req.nr_tx_rings - 1;
+ d->last_rx_ring = d->req.nr_rx_rings - 1;
+ } else if (d->req.nr_flags == NR_REG_NIC_SW) {
+ d->first_tx_ring = 0;
+ d->first_rx_ring = 0;
+ d->last_tx_ring = d->req.nr_tx_rings;
+ d->last_rx_ring = d->req.nr_rx_rings;
+ } else if (d->req.nr_flags == NR_REG_ONE_NIC) {
+ /* XXX check validity */
+ d->first_tx_ring = d->last_tx_ring =
+ d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK;
+ } else { /* pipes */
+ d->first_tx_ring = d->last_tx_ring = 0;
+ d->first_rx_ring = d->last_rx_ring = 0;
+ }
+
+#ifdef DEBUG_NETMAP_USER
+ { /* debugging code */
+ int i;
+
+ D("%s tx %d .. %d %d rx %d .. %d %d", ifname,
+ d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings,
+ d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings);
+ for (i = 0; i <= d->req.nr_tx_rings; i++) {
+ struct netmap_ring *r = NETMAP_TXRING(d->nifp, i);
+ D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail);
+ }
+ for (i = 0; i <= d->req.nr_rx_rings; i++) {
+ struct netmap_ring *r = NETMAP_RXRING(d->nifp, i);
+ D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail);
+ }
+ }
+#endif /* debugging */
+
+ d->cur_tx_ring = d->first_tx_ring;
+ d->cur_rx_ring = d->first_rx_ring;
+ return d;
+
+fail:
+ nm_close(d);
+ if (errmsg)
+ D("%s %s", errmsg, ifname);
+ if (errno == 0)
+ errno = EINVAL;
+ return NULL;
+}
+
+
+static int
+nm_close(struct nm_desc *d)
+{
+ /*
+ * ugly trick to avoid unused warnings
+ */
+ static void *__xxzt[] __attribute__ ((unused)) =
+ { (void *)nm_open, (void *)nm_inject,
+ (void *)nm_dispatch, (void *)nm_nextpkt } ;
+
+ if (d == NULL || d->self != d)
+ return EINVAL;
+ if (d->done_mmap && d->mem)
+ munmap(d->mem, d->memsize);
+ if (d->fd != -1)
+ close(d->fd);
+ bzero(d, sizeof(*d));
+ free(d);
+ return 0;
+}
+
+
+/*
+ * Same prototype as pcap_inject(), only need to cast.
+ */
+static int
+nm_inject(struct nm_desc *d, const void *buf, size_t size)
+{
+ u_int c, n = d->last_tx_ring - d->first_tx_ring + 1;
+
+ for (c = 0; c < n ; c++) {
+ /* compute current ring to use */
+ struct netmap_ring *ring;
+ uint32_t i, idx;
+ uint32_t ri = d->cur_tx_ring + c;
+
+ if (ri > d->last_tx_ring)
+ ri = d->first_tx_ring;
+ ring = NETMAP_TXRING(d->nifp, ri);
+ if (nm_ring_empty(ring)) {
+ continue;
+ }
+ i = ring->cur;
+ idx = ring->slot[i].buf_idx;
+ ring->slot[i].len = size;
+ nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size);
+ d->cur_tx_ring = ri;
+ ring->head = ring->cur = nm_ring_next(ring, i);
+ return size;
+ }
+ return 0; /* fail */
+}
+
+
+/*
+ * Same prototype as pcap_dispatch(), only need to cast.
+ */
+static int
+nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg)
+{
+ int n = d->last_rx_ring - d->first_rx_ring + 1;
+ int c, got = 0, ri = d->cur_rx_ring;
+
+ if (cnt == 0)
+ cnt = -1;
+ /* cnt == -1 means infinite, but rings have a finite amount
+ * of buffers and the int is large enough that we never wrap,
+ * so we can omit checking for -1
+ */
+ for (c=0; c < n && cnt != got; c++) {
+ /* compute current ring to use */
+ struct netmap_ring *ring;
+
+ ri = d->cur_rx_ring + c;
+ if (ri > d->last_rx_ring)
+ ri = d->first_rx_ring;
+ ring = NETMAP_RXRING(d->nifp, ri);
+ for ( ; !nm_ring_empty(ring) && cnt != got; got++) {
+ u_int i = ring->cur;
+ u_int idx = ring->slot[i].buf_idx;
+ u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
+
+ // __builtin_prefetch(buf);
+ d->hdr.len = d->hdr.caplen = ring->slot[i].len;
+ d->hdr.ts = ring->ts;
+ cb(arg, &d->hdr, buf);
+ ring->head = ring->cur = nm_ring_next(ring, i);
+ }
+ }
+ d->cur_rx_ring = ri;
+ return got;
+}
+
+static u_char *
+nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr)
+{
+ int ri = d->cur_rx_ring;
+
+ do {
+ /* compute current ring to use */
+ struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri);
+ if (!nm_ring_empty(ring)) {
+ u_int i = ring->cur;
+ u_int idx = ring->slot[i].buf_idx;
+ u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
+
+ // __builtin_prefetch(buf);
+ hdr->ts = ring->ts;
+ hdr->len = hdr->caplen = ring->slot[i].len;
+ ring->cur = nm_ring_next(ring, i);
+ /* we could postpone advancing head if we want
+ * to hold the buffer. This can be supported in
+ * the future.
+ */
+ ring->head = ring->cur;
+ d->cur_rx_ring = ri;
+ return buf;
+ }
+ ri++;
+ if (ri > d->last_rx_ring)
+ ri = d->first_rx_ring;
+ } while (ri != d->cur_rx_ring);
+ return NULL; /* nothing found */
+}
+
+#endif /* !HAVE_NETMAP_WITH_LIBS */
+
+#endif /* NETMAP_WITH_LIBS */
+
#endif /* _NET_NETMAP_USER_H_ */
Added: trunk/sys/net/paravirt.h
===================================================================
--- trunk/sys/net/paravirt.h (rev 0)
+++ trunk/sys/net/paravirt.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,158 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (C) 2013 Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef NET_PARAVIRT_H
+#define NET_PARAVIRT_H
+
+ /*
+ * $FreeBSD: stable/10/sys/net/paravirt.h 289385 2015-10-15 20:36:04Z adrian $
+ *
+ Support for virtio-like communication between host (H) and guest (G) NICs.
+
+ THIS IS EXPERIMENTAL CODE AND SUBJECT TO CHANGE.
+
+ The guest allocates the shared Communication Status Block (csb) and
+ write its physical address at CSBAL and CSBAH (data is little endian).
+ csb->csb_on enables the mode. If disabled, the device acts a regular one.
+
+ Notifications for tx and rx are exchanged without vm exits
+ if possible. In particular (only mentioning csb mode below),
+ the following actions are performed. In the description below,
+ "double check" means verifying again the condition that caused
+ the previous action, and reverting the action if the condition has
+ changed. The condition typically depends on a variable set by the
+ other party, and the double check is done to avoid races. E.g.
+
+ // start with A=0
+ again:
+ // do something
+ if ( cond(C) ) { // C is written by the other side
+ A = 1;
+ // barrier
+ if ( !cond(C) ) {
+ A = 0;
+ goto again;
+ }
+ }
+
+ TX: start from idle:
+ H starts with host_need_txkick=1 when the I/O thread bh is idle. Upon new
+ transmissions, G always updates guest_tdt. If host_need_txkick == 1,
+ G also writes to the TDT, which acts as a kick to H (so pending
+ writes are always dispatched to H as soon as possible.)
+
+ TX: active state:
+ On the kick (TDT write) H sets host_need_txkick == 0 (if not
+ done already by G), and starts an I/O thread trying to consume
+ packets from TDH to guest_tdt, periodically refreshing host_tdh
+ and TDH. When host_tdh == guest_tdt, H sets host_need_txkick=1,
+ and then does the "double check" for race avoidance.
+
+ TX: G runs out of buffers
+ XXX there are two mechanisms, one boolean (using guest_need_txkick)
+ and one with a threshold (using guest_txkick_at). They are mutually
+ exclusive.
+ BOOLEAN: when G has no space, it sets guest_need_txkick=1 and does
+ the double check. If H finds guest_need_txkick== 1 on a write
+ to TDH, it also generates an interrupt.
+ THRESHOLD: G sets guest_txkick_at to the TDH value for which it
+ wants to receive an interrupt. When H detects that TDH moves
+ across guest_txkick_at, it generates an interrupt.
+ This second mechanism reduces the number of interrupts and
+ TDT writes on the transmit side when the host is too slow.
+
+ RX: start from idle
+ G starts with guest_need_rxkick = 1 when the receive ring is empty.
+ As packets arrive, H updates host_rdh (and RDH) and also generates an
+ interrupt when guest_need_rxkick == 1 (so incoming packets are
+ always reported to G as soon as possible, apart from interrupt
+ moderation delays). It also tracks guest_rdt for new buffers.
+
+ RX: active state
+ As the interrupt arrives, G sets guest_need_rxkick = 0 and starts
+ draining packets from the receive ring, while updating guest_rdt
+ When G runs out of packets it sets guest_need_rxkick=1 and does the
+ double check.
+
+ RX: H runs out of buffers
+ XXX there are two mechanisms, one boolean (using host_need_rxkick)
+ and one with a threshold (using host_xxkick_at). They are mutually
+ exclusive.
+ BOOLEAN: when H has no space, it sets host_need_rxkick=1 and does the
+ double check. If G finds host_need_rxkick==1 on updating guest_rdt,
+ it also writes to RDT causing a kick to H.
+ THRESHOLD: H sets host_rxkick_at to the RDT value for which it wants
+ to receive a kick. When G detects that guest_rdt moves across
+ host_rxkick_at, it writes to RDT thus generates a kick.
+ This second mechanism reduces the number of kicks and
+ RDT writes on the receive side when the guest is too slow and
+ would free only a few buffers at a time.
+
+ */
+struct paravirt_csb {
+ /* XXX revise the layout to minimize cache bounces.
+ * Usage is described as follows:
+ * [GH][RW][+-0] guest/host reads/writes frequently/rarely/almost never
+ */
+ /* these are (mostly) written by the guest */
+ uint32_t guest_tdt; /* GW+ HR+ pkt to transmit */
+ uint32_t guest_need_txkick; /* GW- HR+ G ran out of tx bufs, request kick */
+ uint32_t guest_need_rxkick; /* GW- HR+ G ran out of rx pkts, request kick */
+ uint32_t guest_csb_on; /* GW- HR+ enable paravirtual mode */
+ uint32_t guest_rdt; /* GW+ HR+ rx buffers available */
+ uint32_t guest_txkick_at; /* GW- HR+ tx ring pos. where G expects an intr */
+ uint32_t guest_use_msix; /* GW0 HR0 guest uses MSI-X interrupts. */
+ uint32_t pad[9];
+
+ /* these are (mostly) written by the host */
+ uint32_t host_tdh; /* GR0 HW- shadow register, mostly unused */
+ uint32_t host_need_txkick; /* GR+ HW- start the iothread */
+ uint32_t host_txcycles_lim; /* GW- HR- how much to spin before sleep.
+ * set by the guest */
+ uint32_t host_txcycles; /* GR0 HW- counter, but no need to be exported */
+ uint32_t host_rdh; /* GR0 HW- shadow register, mostly unused */
+ uint32_t host_need_rxkick; /* GR+ HW- flush rx queued packets */
+ uint32_t host_isr; /* GR* HW* shadow copy of ISR */
+ uint32_t host_rxkick_at; /* GR+ HW- rx ring pos where H expects a kick */
+ uint32_t vnet_ring_high; /* Vnet ring physical address high. */
+ uint32_t vnet_ring_low; /* Vnet ring physical address low. */
+};
+
+#define NET_PARAVIRT_CSB_SIZE 4096
+#define NET_PARAVIRT_NONE (~((uint32_t)0))
+
+#ifdef QEMU_PCI_H
+
+/*
+ * API functions only available within QEMU
+ */
+
+void paravirt_configure_csb(struct paravirt_csb** csb, uint32_t csbbal,
+ uint32_t csbbah, QEMUBH* tx_bh, AddressSpace *as);
+
+#endif /* QEMU_PCI_H */
+
+#endif /* NET_PARAVIRT_H */
Property changes on: trunk/sys/net/paravirt.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/pfil.c
===================================================================
--- trunk/sys/net/pfil.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/pfil.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,5 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/net/pfil.c 198233 2009-10-19 15:19:14Z rwatson $ */
+/* $FreeBSD: stable/10/sys/net/pfil.c 254774 2013-08-24 11:24:15Z andre $ */
/* $NetBSD: pfil.c,v 1.20 2001/11/12 23:49:46 lukem Exp $ */
/*-
@@ -53,18 +53,18 @@
MTX_SYSINIT(pfil_heads_lock, &pfil_global_lock, "pfil_head_list lock",
MTX_DEF);
-static int pfil_list_add(pfil_list_t *, struct packet_filter_hook *, int);
+static struct packet_filter_hook *pfil_chain_get(int, struct pfil_head *);
+static int pfil_chain_add(pfil_chain_t *, struct packet_filter_hook *, int);
+static int pfil_chain_remove(pfil_chain_t *, pfil_func_t, void *);
-static int pfil_list_remove(pfil_list_t *,
- int (*)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
- void *);
-
LIST_HEAD(pfilheadhead, pfil_head);
VNET_DEFINE(struct pfilheadhead, pfil_head_list);
#define V_pfil_head_list VNET(pfil_head_list)
+VNET_DEFINE(struct rmlock, pfil_lock);
+#define V_pfil_lock VNET(pfil_lock)
/*
- * pfil_run_hooks() runs the specified packet filter hooks.
+ * pfil_run_hooks() runs the specified packet filter hook chain.
*/
int
pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
@@ -77,8 +77,8 @@
PFIL_RLOCK(ph, &rmpt);
KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0"));
- for (pfh = pfil_hook_get(dir, ph); pfh != NULL;
- pfh = TAILQ_NEXT(pfh, pfil_link)) {
+ for (pfh = pfil_chain_get(dir, ph); pfh != NULL;
+ pfh = TAILQ_NEXT(pfh, pfil_chain)) {
if (pfh->pfil_func != NULL) {
rv = (*pfh->pfil_func)(pfh->pfil_arg, &m, ifp, dir,
inp);
@@ -91,7 +91,81 @@
return (rv);
}
+static struct packet_filter_hook *
+pfil_chain_get(int dir, struct pfil_head *ph)
+{
+
+ if (dir == PFIL_IN)
+ return (TAILQ_FIRST(&ph->ph_in));
+ else if (dir == PFIL_OUT)
+ return (TAILQ_FIRST(&ph->ph_out));
+ else
+ return (NULL);
+}
+
/*
+ * pfil_try_rlock() acquires rm reader lock for specified head
+ * if this is immediately possible.
+ */
+int
+pfil_try_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+{
+
+ return (PFIL_TRY_RLOCK(ph, tracker));
+}
+
+/*
+ * pfil_rlock() acquires rm reader lock for specified head.
+ */
+void
+pfil_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+{
+
+ PFIL_RLOCK(ph, tracker);
+}
+
+/*
+ * pfil_runlock() releases reader lock for specified head.
+ */
+void
+pfil_runlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+{
+
+ PFIL_RUNLOCK(ph, tracker);
+}
+
+/*
+ * pfil_wlock() acquires writer lock for specified head.
+ */
+void
+pfil_wlock(struct pfil_head *ph)
+{
+
+ PFIL_WLOCK(ph);
+}
+
+/*
+ * pfil_wunlock() releases writer lock for specified head.
+ */
+void
+pfil_wunlock(struct pfil_head *ph)
+{
+
+ PFIL_WUNLOCK(ph);
+}
+
+/*
+ * pfil_wowned() returns a non-zero value if the current thread owns
+ * an exclusive lock.
+ */
+int
+pfil_wowned(struct pfil_head *ph)
+{
+
+ return (PFIL_WOWNED(ph));
+}
+
+/*
* pfil_head_register() registers a pfil_head with the packet filter hook
* mechanism.
*/
@@ -100,11 +174,11 @@
{
struct pfil_head *lph;
- PFIL_LIST_LOCK();
+ PFIL_HEADLIST_LOCK();
LIST_FOREACH(lph, &V_pfil_head_list, ph_list) {
if (ph->ph_type == lph->ph_type &&
ph->ph_un.phu_val == lph->ph_un.phu_val) {
- PFIL_LIST_UNLOCK();
+ PFIL_HEADLIST_UNLOCK();
return (EEXIST);
}
}
@@ -113,7 +187,7 @@
TAILQ_INIT(&ph->ph_in);
TAILQ_INIT(&ph->ph_out);
LIST_INSERT_HEAD(&V_pfil_head_list, ph, ph_list);
- PFIL_LIST_UNLOCK();
+ PFIL_HEADLIST_UNLOCK();
return (0);
}
@@ -127,12 +201,12 @@
{
struct packet_filter_hook *pfh, *pfnext;
- PFIL_LIST_LOCK();
+ PFIL_HEADLIST_LOCK();
LIST_REMOVE(ph, ph_list);
- PFIL_LIST_UNLOCK();
- TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_link, pfnext)
+ PFIL_HEADLIST_UNLOCK();
+ TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_chain, pfnext)
free(pfh, M_IFADDR);
- TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_link, pfnext)
+ TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_chain, pfnext)
free(pfh, M_IFADDR);
PFIL_LOCK_DESTROY(ph);
return (0);
@@ -146,11 +220,11 @@
{
struct pfil_head *ph;
- PFIL_LIST_LOCK();
+ PFIL_HEADLIST_LOCK();
LIST_FOREACH(ph, &V_pfil_head_list, ph_list)
if (ph->ph_type == type && ph->ph_un.phu_val == val)
break;
- PFIL_LIST_UNLOCK();
+ PFIL_HEADLIST_UNLOCK();
return (ph);
}
@@ -163,8 +237,7 @@
* PFIL_WAITOK OK to call malloc with M_WAITOK.
*/
int
-pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
- struct inpcb *), void *arg, int flags, struct pfil_head *ph)
+pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
{
struct packet_filter_hook *pfh1 = NULL;
struct packet_filter_hook *pfh2 = NULL;
@@ -190,7 +263,7 @@
if (flags & PFIL_IN) {
pfh1->pfil_func = func;
pfh1->pfil_arg = arg;
- err = pfil_list_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
+ err = pfil_chain_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
if (err)
goto locked_error;
ph->ph_nhooks++;
@@ -198,10 +271,10 @@
if (flags & PFIL_OUT) {
pfh2->pfil_func = func;
pfh2->pfil_arg = arg;
- err = pfil_list_add(&ph->ph_out, pfh2, flags & ~PFIL_IN);
+ err = pfil_chain_add(&ph->ph_out, pfh2, flags & ~PFIL_IN);
if (err) {
if (flags & PFIL_IN)
- pfil_list_remove(&ph->ph_in, func, arg);
+ pfil_chain_remove(&ph->ph_in, func, arg);
goto locked_error;
}
ph->ph_nhooks++;
@@ -220,22 +293,21 @@
/*
* pfil_remove_hook removes a specific function from the packet filter hook
- * list.
+ * chain.
*/
int
-pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
- struct inpcb *), void *arg, int flags, struct pfil_head *ph)
+pfil_remove_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
{
int err = 0;
PFIL_WLOCK(ph);
if (flags & PFIL_IN) {
- err = pfil_list_remove(&ph->ph_in, func, arg);
+ err = pfil_chain_remove(&ph->ph_in, func, arg);
if (err == 0)
ph->ph_nhooks--;
}
if ((err == 0) && (flags & PFIL_OUT)) {
- err = pfil_list_remove(&ph->ph_out, func, arg);
+ err = pfil_chain_remove(&ph->ph_out, func, arg);
if (err == 0)
ph->ph_nhooks--;
}
@@ -243,8 +315,11 @@
return (err);
}
+/*
+ * Internal: Add a new pfil hook into a hook chain.
+ */
static int
-pfil_list_add(pfil_list_t *list, struct packet_filter_hook *pfh1, int flags)
+pfil_chain_add(pfil_chain_t *chain, struct packet_filter_hook *pfh1, int flags)
{
struct packet_filter_hook *pfh;
@@ -251,7 +326,7 @@
/*
* First make sure the hook is not already there.
*/
- TAILQ_FOREACH(pfh, list, pfil_link)
+ TAILQ_FOREACH(pfh, chain, pfil_chain)
if (pfh->pfil_func == pfh1->pfil_func &&
pfh->pfil_arg == pfh1->pfil_arg)
return (EEXIST);
@@ -261,26 +336,23 @@
* the same path is followed in or out of the kernel.
*/
if (flags & PFIL_IN)
- TAILQ_INSERT_HEAD(list, pfh1, pfil_link);
+ TAILQ_INSERT_HEAD(chain, pfh1, pfil_chain);
else
- TAILQ_INSERT_TAIL(list, pfh1, pfil_link);
+ TAILQ_INSERT_TAIL(chain, pfh1, pfil_chain);
return (0);
}
/*
- * pfil_list_remove is an internal function that takes a function off the
- * specified list.
+ * Internal: Remove a pfil hook from a hook chain.
*/
static int
-pfil_list_remove(pfil_list_t *list,
- int (*func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
- void *arg)
+pfil_chain_remove(pfil_chain_t *chain, pfil_func_t func, void *arg)
{
struct packet_filter_hook *pfh;
- TAILQ_FOREACH(pfh, list, pfil_link)
+ TAILQ_FOREACH(pfh, chain, pfil_chain)
if (pfh->pfil_func == func && pfh->pfil_arg == arg) {
- TAILQ_REMOVE(list, pfh, pfil_link);
+ TAILQ_REMOVE(chain, pfh, pfil_chain);
free(pfh, M_IFADDR);
return (0);
}
@@ -296,6 +368,7 @@
{
LIST_INIT(&V_pfil_head_list);
+ PFIL_LOCK_INIT_REAL(&V_pfil_lock, "shared");
return (0);
}
@@ -306,7 +379,9 @@
vnet_pfil_uninit(const void *unused)
{
- /* XXX should panic if list is not empty */
+ KASSERT(LIST_EMPTY(&V_pfil_head_list),
+ ("%s: pfil_head_list %p not empty", __func__, &V_pfil_head_list));
+ PFIL_LOCK_DESTROY_REAL(&V_pfil_lock);
return (0);
}
Modified: trunk/sys/net/pfil.h
===================================================================
--- trunk/sys/net/pfil.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/pfil.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,5 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/net/pfil.h 210121 2010-07-15 14:41:06Z luigi $ */
+/* $FreeBSD: stable/10/sys/net/pfil.h 254777 2013-08-24 12:03:24Z andre $ */
/* $NetBSD: pfil.h,v 1.22 2003/06/23 12:57:08 martin Exp $ */
/*-
@@ -44,15 +44,18 @@
struct ifnet;
struct inpcb;
+typedef int (*pfil_func_t)(void *, struct mbuf **, struct ifnet *, int,
+ struct inpcb *);
+
/*
* The packet filter hooks are designed for anything to call them to
- * possibly intercept the packet.
+ * possibly intercept the packet. Multiple filter hooks are chained
+ * together and after each other in the specified order.
*/
struct packet_filter_hook {
- TAILQ_ENTRY(packet_filter_hook) pfil_link;
- int (*pfil_func)(void *, struct mbuf **, struct ifnet *, int,
- struct inpcb *);
- void *pfil_arg;
+ TAILQ_ENTRY(packet_filter_hook) pfil_chain;
+ pfil_func_t pfil_func;
+ void *pfil_arg;
};
#define PFIL_IN 0x00000001
@@ -60,63 +63,87 @@
#define PFIL_WAITOK 0x00000004
#define PFIL_ALL (PFIL_IN|PFIL_OUT)
-typedef TAILQ_HEAD(pfil_list, packet_filter_hook) pfil_list_t;
+typedef TAILQ_HEAD(pfil_chain, packet_filter_hook) pfil_chain_t;
#define PFIL_TYPE_AF 1 /* key is AF_* type */
#define PFIL_TYPE_IFNET 2 /* key is ifnet pointer */
+#define PFIL_FLAG_PRIVATE_LOCK 0x01 /* Personal lock instead of global */
+
+/*
+ * A pfil head is created by each protocol or packet intercept point.
+ * For packet is then run through the hook chain for inspection.
+ */
struct pfil_head {
- pfil_list_t ph_in;
- pfil_list_t ph_out;
- int ph_type;
- int ph_nhooks;
+ pfil_chain_t ph_in;
+ pfil_chain_t ph_out;
+ int ph_type;
+ int ph_nhooks;
#if defined( __linux__ ) || defined( _WIN32 )
- rwlock_t ph_mtx;
+ rwlock_t ph_mtx;
#else
- struct rmlock ph_lock;
+ struct rmlock *ph_plock; /* Pointer to the used lock */
+ struct rmlock ph_lock; /* Private lock storage */
+ int flags;
#endif
union {
- u_long phu_val;
- void *phu_ptr;
+ u_long phu_val;
+ void *phu_ptr;
} ph_un;
-#define ph_af ph_un.phu_val
-#define ph_ifnet ph_un.phu_ptr
+#define ph_af ph_un.phu_val
+#define ph_ifnet ph_un.phu_ptr
LIST_ENTRY(pfil_head) ph_list;
};
-int pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
- int, struct inpcb *), void *, int, struct pfil_head *);
-int pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
- int, struct inpcb *), void *, int, struct pfil_head *);
+/* Public functions for pfil hook management by packet filters. */
+struct pfil_head *pfil_head_get(int, u_long);
+int pfil_add_hook(pfil_func_t, void *, int, struct pfil_head *);
+int pfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *);
+#define PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
+
+/* Public functions to run the packet inspection by protocols. */
int pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *,
int, struct inpcb *inp);
+/* Public functions for pfil head management by protocols. */
int pfil_head_register(struct pfil_head *);
int pfil_head_unregister(struct pfil_head *);
-struct pfil_head *pfil_head_get(int, u_long);
+/* Public pfil locking functions for self managed locks by packet filters. */
+struct rm_priotracker; /* Do not require including rmlock header */
+int pfil_try_rlock(struct pfil_head *, struct rm_priotracker *);
+void pfil_rlock(struct pfil_head *, struct rm_priotracker *);
+void pfil_runlock(struct pfil_head *, struct rm_priotracker *);
+void pfil_wlock(struct pfil_head *);
+void pfil_wunlock(struct pfil_head *);
+int pfil_wowned(struct pfil_head *ph);
-#define PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
-#define PFIL_LOCK_INIT(p) \
- rm_init_flags(&(p)->ph_lock, "PFil hook read/write mutex", RM_RECURSE)
-#define PFIL_LOCK_DESTROY(p) rm_destroy(&(p)->ph_lock)
-#define PFIL_RLOCK(p, t) rm_rlock(&(p)->ph_lock, (t))
-#define PFIL_WLOCK(p) rm_wlock(&(p)->ph_lock)
-#define PFIL_RUNLOCK(p, t) rm_runlock(&(p)->ph_lock, (t))
-#define PFIL_WUNLOCK(p) rm_wunlock(&(p)->ph_lock)
-#define PFIL_LIST_LOCK() mtx_lock(&pfil_global_lock)
-#define PFIL_LIST_UNLOCK() mtx_unlock(&pfil_global_lock)
+/* Internal pfil locking functions. */
+#define PFIL_LOCK_INIT_REAL(l, t) \
+ rm_init_flags(l, "PFil " t " rmlock", RM_RECURSE)
+#define PFIL_LOCK_DESTROY_REAL(l) \
+ rm_destroy(l)
+#define PFIL_LOCK_INIT(p) do { \
+ if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) { \
+ PFIL_LOCK_INIT_REAL(&(p)->ph_lock, "private"); \
+ (p)->ph_plock = &(p)->ph_lock; \
+ } else \
+ (p)->ph_plock = &V_pfil_lock; \
+} while (0)
+#define PFIL_LOCK_DESTROY(p) do { \
+ if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) \
+ PFIL_LOCK_DESTROY_REAL((p)->ph_plock); \
+} while (0)
-static __inline struct packet_filter_hook *
-pfil_hook_get(int dir, struct pfil_head *ph)
-{
+#define PFIL_TRY_RLOCK(p, t) rm_try_rlock((p)->ph_plock, (t))
+#define PFIL_RLOCK(p, t) rm_rlock((p)->ph_plock, (t))
+#define PFIL_WLOCK(p) rm_wlock((p)->ph_plock)
+#define PFIL_RUNLOCK(p, t) rm_runlock((p)->ph_plock, (t))
+#define PFIL_WUNLOCK(p) rm_wunlock((p)->ph_plock)
+#define PFIL_WOWNED(p) rm_wowned((p)->ph_plock)
- if (dir == PFIL_IN)
- return (TAILQ_FIRST(&ph->ph_in));
- else if (dir == PFIL_OUT)
- return (TAILQ_FIRST(&ph->ph_out));
- else
- return (NULL);
-}
+/* Internal locking macros for global/vnet pfil_head_list. */
+#define PFIL_HEADLIST_LOCK() mtx_lock(&pfil_global_lock)
+#define PFIL_HEADLIST_UNLOCK() mtx_unlock(&pfil_global_lock)
#endif /* _NET_PFIL_H_ */
Modified: trunk/sys/net/pfkeyv2.h
===================================================================
--- trunk/sys/net/pfkeyv2.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/pfkeyv2.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,5 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/net/pfkeyv2.h 194062 2009-06-12 15:44:35Z vanhu $ */
+/* $FreeBSD: stable/10/sys/net/pfkeyv2.h 194062 2009-06-12 15:44:35Z vanhu $ */
/* $KAME: pfkeyv2.h,v 1.37 2003/09/06 05:15:43 itojun Exp $ */
/*-
Added: trunk/sys/net/pfvar.h
===================================================================
--- trunk/sys/net/pfvar.h (rev 0)
+++ trunk/sys/net/pfvar.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,1752 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $
+ * $FreeBSD: stable/10/sys/net/pfvar.h 332494 2018-04-13 22:33:18Z kp $
+ */
+
+#ifndef _NET_PFVAR_H_
+#define _NET_PFVAR_H_
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/counter.h>
+#include <sys/refcount.h>
+#include <sys/tree.h>
+
+#include <net/radix.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+
+struct pf_addr {
+ union {
+ struct in_addr v4;
+ struct in6_addr v6;
+ u_int8_t addr8[16];
+ u_int16_t addr16[8];
+ u_int32_t addr32[4];
+ } pfa; /* 128-bit address */
+#define v4 pfa.v4
+#define v6 pfa.v6
+#define addr8 pfa.addr8
+#define addr16 pfa.addr16
+#define addr32 pfa.addr32
+};
+
+#define PFI_AFLAG_NETWORK 0x01
+#define PFI_AFLAG_BROADCAST 0x02
+#define PFI_AFLAG_PEER 0x04
+#define PFI_AFLAG_MODEMASK 0x07
+#define PFI_AFLAG_NOALIAS 0x08
+
+struct pf_addr_wrap {
+ union {
+ struct {
+ struct pf_addr addr;
+ struct pf_addr mask;
+ } a;
+ char ifname[IFNAMSIZ];
+ char tblname[PF_TABLE_NAME_SIZE];
+ } v;
+ union {
+ struct pfi_dynaddr *dyn;
+ struct pfr_ktable *tbl;
+ int dyncnt;
+ int tblcnt;
+ } p;
+ u_int8_t type; /* PF_ADDR_* */
+ u_int8_t iflags; /* PFI_AFLAG_* */
+};
+
+#ifdef _KERNEL
+
+struct pfi_dynaddr {
+ TAILQ_ENTRY(pfi_dynaddr) entry;
+ struct pf_addr pfid_addr4;
+ struct pf_addr pfid_mask4;
+ struct pf_addr pfid_addr6;
+ struct pf_addr pfid_mask6;
+ struct pfr_ktable *pfid_kt;
+ struct pfi_kif *pfid_kif;
+ int pfid_net; /* mask or 128 */
+ int pfid_acnt4; /* address count IPv4 */
+ int pfid_acnt6; /* address count IPv6 */
+ sa_family_t pfid_af; /* rule af */
+ u_int8_t pfid_iflags; /* PFI_AFLAG_* */
+};
+
+/*
+ * Address manipulation macros
+ */
+#define HTONL(x) (x) = htonl((__uint32_t)(x))
+#define HTONS(x) (x) = htons((__uint16_t)(x))
+#define NTOHL(x) (x) = ntohl((__uint32_t)(x))
+#define NTOHS(x) (x) = ntohs((__uint16_t)(x))
+
+#define PF_NAME "pf"
+
+#define PF_HASHROW_ASSERT(h) mtx_assert(&(h)->lock, MA_OWNED)
+#define PF_HASHROW_LOCK(h) mtx_lock(&(h)->lock)
+#define PF_HASHROW_UNLOCK(h) mtx_unlock(&(h)->lock)
+
+#define PF_STATE_LOCK(s) \
+ do { \
+ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)]; \
+ PF_HASHROW_LOCK(_ih); \
+ } while (0)
+
+#define PF_STATE_UNLOCK(s) \
+ do { \
+ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH((s))]; \
+ PF_HASHROW_UNLOCK(_ih); \
+ } while (0)
+
+#ifdef INVARIANTS
+#define PF_STATE_LOCK_ASSERT(s) \
+ do { \
+ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)]; \
+ PF_HASHROW_ASSERT(_ih); \
+ } while (0)
+#else /* !INVARIANTS */
+#define PF_STATE_LOCK_ASSERT(s) do {} while (0)
+#endif /* INVARIANTS */
+
+extern struct mtx pf_unlnkdrules_mtx;
+#define PF_UNLNKDRULES_LOCK() mtx_lock(&pf_unlnkdrules_mtx)
+#define PF_UNLNKDRULES_UNLOCK() mtx_unlock(&pf_unlnkdrules_mtx)
+
+extern struct rwlock pf_rules_lock;
+#define PF_RULES_RLOCK() rw_rlock(&pf_rules_lock)
+#define PF_RULES_RUNLOCK() rw_runlock(&pf_rules_lock)
+#define PF_RULES_WLOCK() rw_wlock(&pf_rules_lock)
+#define PF_RULES_WUNLOCK() rw_wunlock(&pf_rules_lock)
+#define PF_RULES_ASSERT() rw_assert(&pf_rules_lock, RA_LOCKED)
+#define PF_RULES_RASSERT() rw_assert(&pf_rules_lock, RA_RLOCKED)
+#define PF_RULES_WASSERT() rw_assert(&pf_rules_lock, RA_WLOCKED)
+
+#define PF_MODVER 1
+#define PFLOG_MODVER 1
+#define PFSYNC_MODVER 1
+
+#define PFLOG_MINVER 1
+#define PFLOG_PREFVER PFLOG_MODVER
+#define PFLOG_MAXVER 1
+#define PFSYNC_MINVER 1
+#define PFSYNC_PREFVER PFSYNC_MODVER
+#define PFSYNC_MAXVER 1
+
+#ifdef INET
+#ifndef INET6
+#define PF_INET_ONLY
+#endif /* ! INET6 */
+#endif /* INET */
+
+#ifdef INET6
+#ifndef INET
+#define PF_INET6_ONLY
+#endif /* ! INET */
+#endif /* INET6 */
+
+#ifdef INET
+#ifdef INET6
+#define PF_INET_INET6
+#endif /* INET6 */
+#endif /* INET */
+
+#else
+
+#define PF_INET_INET6
+
+#endif /* _KERNEL */
+
+/* Both IPv4 and IPv6 */
+#ifdef PF_INET_INET6
+
+#define PF_AEQ(a, b, c) \
+ ((c == AF_INET && (a)->addr32[0] == (b)->addr32[0]) || \
+ (c == AF_INET6 && (a)->addr32[3] == (b)->addr32[3] && \
+ (a)->addr32[2] == (b)->addr32[2] && \
+ (a)->addr32[1] == (b)->addr32[1] && \
+ (a)->addr32[0] == (b)->addr32[0])) \
+
+#define PF_ANEQ(a, b, c) \
+ ((c == AF_INET && (a)->addr32[0] != (b)->addr32[0]) || \
+ (c == AF_INET6 && ((a)->addr32[0] != (b)->addr32[0] || \
+ (a)->addr32[1] != (b)->addr32[1] || \
+ (a)->addr32[2] != (b)->addr32[2] || \
+ (a)->addr32[3] != (b)->addr32[3]))) \
+
+#define PF_AZERO(a, c) \
+ ((c == AF_INET && !(a)->addr32[0]) || \
+ (c == AF_INET6 && !(a)->addr32[0] && !(a)->addr32[1] && \
+ !(a)->addr32[2] && !(a)->addr32[3] )) \
+
+#define PF_MATCHA(n, a, m, b, f) \
+ pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+ pf_addrcpy(a, b, f)
+
+#define PF_AINC(a, f) \
+ pf_addr_inc(a, f)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+ pf_poolmask(a, b, c, d, f)
+
+#else
+
+/* Just IPv6 */
+
+#ifdef PF_INET6_ONLY
+
+#define PF_AEQ(a, b, c) \
+ ((a)->addr32[3] == (b)->addr32[3] && \
+ (a)->addr32[2] == (b)->addr32[2] && \
+ (a)->addr32[1] == (b)->addr32[1] && \
+ (a)->addr32[0] == (b)->addr32[0]) \
+
+#define PF_ANEQ(a, b, c) \
+ ((a)->addr32[3] != (b)->addr32[3] || \
+ (a)->addr32[2] != (b)->addr32[2] || \
+ (a)->addr32[1] != (b)->addr32[1] || \
+ (a)->addr32[0] != (b)->addr32[0]) \
+
+#define PF_AZERO(a, c) \
+ (!(a)->addr32[0] && \
+ !(a)->addr32[1] && \
+ !(a)->addr32[2] && \
+ !(a)->addr32[3] ) \
+
+#define PF_MATCHA(n, a, m, b, f) \
+ pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+ pf_addrcpy(a, b, f)
+
+#define PF_AINC(a, f) \
+ pf_addr_inc(a, f)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+ pf_poolmask(a, b, c, d, f)
+
+#else
+
+/* Just IPv4 */
+#ifdef PF_INET_ONLY
+
+#define PF_AEQ(a, b, c) \
+ ((a)->addr32[0] == (b)->addr32[0])
+
+#define PF_ANEQ(a, b, c) \
+ ((a)->addr32[0] != (b)->addr32[0])
+
+#define PF_AZERO(a, c) \
+ (!(a)->addr32[0])
+
+#define PF_MATCHA(n, a, m, b, f) \
+ pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+ (a)->v4.s_addr = (b)->v4.s_addr
+
+#define PF_AINC(a, f) \
+ do { \
+ (a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \
+ } while (0)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+ do { \
+ (a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \
+ (((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \
+ } while (0)
+
+#endif /* PF_INET_ONLY */
+#endif /* PF_INET6_ONLY */
+#endif /* PF_INET_INET6 */
+
+/*
+ * XXX callers not FIB-aware in our version of pf yet.
+ * OpenBSD fixed it later it seems, 2010/05/07 13:33:16 claudio.
+ */
+#define PF_MISMATCHAW(aw, x, af, neg, ifp, rtid) \
+ ( \
+ (((aw)->type == PF_ADDR_NOROUTE && \
+ pf_routable((x), (af), NULL, (rtid))) || \
+ (((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \
+ pf_routable((x), (af), (ifp), (rtid))) || \
+ ((aw)->type == PF_ADDR_TABLE && \
+ !pfr_match_addr((aw)->p.tbl, (x), (af))) || \
+ ((aw)->type == PF_ADDR_DYNIFTL && \
+ !pfi_match_addr((aw)->p.dyn, (x), (af))) || \
+ ((aw)->type == PF_ADDR_RANGE && \
+ !pf_match_addr_range(&(aw)->v.a.addr, \
+ &(aw)->v.a.mask, (x), (af))) || \
+ ((aw)->type == PF_ADDR_ADDRMASK && \
+ !PF_AZERO(&(aw)->v.a.mask, (af)) && \
+ !PF_MATCHA(0, &(aw)->v.a.addr, \
+ &(aw)->v.a.mask, (x), (af))))) != \
+ (neg) \
+ )
+
+
+struct pf_rule_uid {
+ uid_t uid[2];
+ u_int8_t op;
+};
+
+struct pf_rule_gid {
+ uid_t gid[2];
+ u_int8_t op;
+};
+
+struct pf_rule_addr {
+ struct pf_addr_wrap addr;
+ u_int16_t port[2];
+ u_int8_t neg;
+ u_int8_t port_op;
+};
+
+struct pf_pooladdr {
+ struct pf_addr_wrap addr;
+ TAILQ_ENTRY(pf_pooladdr) entries;
+ char ifname[IFNAMSIZ];
+ struct pfi_kif *kif;
+};
+
+TAILQ_HEAD(pf_palist, pf_pooladdr);
+
+struct pf_poolhashkey {
+ union {
+ u_int8_t key8[16];
+ u_int16_t key16[8];
+ u_int32_t key32[4];
+ } pfk; /* 128-bit hash key */
+#define key8 pfk.key8
+#define key16 pfk.key16
+#define key32 pfk.key32
+};
+
+struct pf_pool {
+ struct pf_palist list;
+ struct pf_pooladdr *cur;
+ struct pf_poolhashkey key;
+ struct pf_addr counter;
+ int tblidx;
+ u_int16_t proxy_port[2];
+ u_int8_t opts;
+};
+
+
+/* A packed Operating System description for fingerprinting */
+typedef u_int32_t pf_osfp_t;
+#define PF_OSFP_ANY ((pf_osfp_t)0)
+#define PF_OSFP_UNKNOWN ((pf_osfp_t)-1)
+#define PF_OSFP_NOMATCH ((pf_osfp_t)-2)
+
+struct pf_osfp_entry {
+ SLIST_ENTRY(pf_osfp_entry) fp_entry;
+ pf_osfp_t fp_os;
+ int fp_enflags;
+#define PF_OSFP_EXPANDED 0x001 /* expanded entry */
+#define PF_OSFP_GENERIC 0x002 /* generic signature */
+#define PF_OSFP_NODETAIL 0x004 /* no p0f details */
+#define PF_OSFP_LEN 32
+ char fp_class_nm[PF_OSFP_LEN];
+ char fp_version_nm[PF_OSFP_LEN];
+ char fp_subtype_nm[PF_OSFP_LEN];
+};
+#define PF_OSFP_ENTRY_EQ(a, b) \
+ ((a)->fp_os == (b)->fp_os && \
+ memcmp((a)->fp_class_nm, (b)->fp_class_nm, PF_OSFP_LEN) == 0 && \
+ memcmp((a)->fp_version_nm, (b)->fp_version_nm, PF_OSFP_LEN) == 0 && \
+ memcmp((a)->fp_subtype_nm, (b)->fp_subtype_nm, PF_OSFP_LEN) == 0)
+
+/* handle pf_osfp_t packing */
+#define _FP_RESERVED_BIT 1 /* For the special negative #defines */
+#define _FP_UNUSED_BITS 1
+#define _FP_CLASS_BITS 10 /* OS Class (Windows, Linux) */
+#define _FP_VERSION_BITS 10 /* OS version (95, 98, NT, 2.4.54, 3.2) */
+#define _FP_SUBTYPE_BITS 10 /* patch level (NT SP4, SP3, ECN patch) */
+#define PF_OSFP_UNPACK(osfp, class, version, subtype) do { \
+ (class) = ((osfp) >> (_FP_VERSION_BITS+_FP_SUBTYPE_BITS)) & \
+ ((1 << _FP_CLASS_BITS) - 1); \
+ (version) = ((osfp) >> _FP_SUBTYPE_BITS) & \
+ ((1 << _FP_VERSION_BITS) - 1);\
+ (subtype) = (osfp) & ((1 << _FP_SUBTYPE_BITS) - 1); \
+} while(0)
+#define PF_OSFP_PACK(osfp, class, version, subtype) do { \
+ (osfp) = ((class) & ((1 << _FP_CLASS_BITS) - 1)) << (_FP_VERSION_BITS \
+ + _FP_SUBTYPE_BITS); \
+ (osfp) |= ((version) & ((1 << _FP_VERSION_BITS) - 1)) << \
+ _FP_SUBTYPE_BITS; \
+ (osfp) |= (subtype) & ((1 << _FP_SUBTYPE_BITS) - 1); \
+} while(0)
+
+/* the fingerprint of an OSes TCP SYN packet */
+typedef u_int64_t pf_tcpopts_t;
+struct pf_os_fingerprint {
+ SLIST_HEAD(pf_osfp_enlist, pf_osfp_entry) fp_oses; /* list of matches */
+ pf_tcpopts_t fp_tcpopts; /* packed TCP options */
+ u_int16_t fp_wsize; /* TCP window size */
+ u_int16_t fp_psize; /* ip->ip_len */
+ u_int16_t fp_mss; /* TCP MSS */
+ u_int16_t fp_flags;
+#define PF_OSFP_WSIZE_MOD 0x0001 /* Window modulus */
+#define PF_OSFP_WSIZE_DC 0x0002 /* Window don't care */
+#define PF_OSFP_WSIZE_MSS 0x0004 /* Window multiple of MSS */
+#define PF_OSFP_WSIZE_MTU 0x0008 /* Window multiple of MTU */
+#define PF_OSFP_PSIZE_MOD 0x0010 /* packet size modulus */
+#define PF_OSFP_PSIZE_DC 0x0020 /* packet size don't care */
+#define PF_OSFP_WSCALE 0x0040 /* TCP window scaling */
+#define PF_OSFP_WSCALE_MOD 0x0080 /* TCP window scale modulus */
+#define PF_OSFP_WSCALE_DC 0x0100 /* TCP window scale dont-care */
+#define PF_OSFP_MSS 0x0200 /* TCP MSS */
+#define PF_OSFP_MSS_MOD 0x0400 /* TCP MSS modulus */
+#define PF_OSFP_MSS_DC 0x0800 /* TCP MSS dont-care */
+#define PF_OSFP_DF 0x1000 /* IPv4 don't fragment bit */
+#define PF_OSFP_TS0 0x2000 /* Zero timestamp */
+#define PF_OSFP_INET6 0x4000 /* IPv6 */
+ u_int8_t fp_optcnt; /* TCP option count */
+ u_int8_t fp_wscale; /* TCP window scaling */
+ u_int8_t fp_ttl; /* IPv4 TTL */
+#define PF_OSFP_MAXTTL_OFFSET 40
+/* TCP options packing */
+#define PF_OSFP_TCPOPT_NOP 0x0 /* TCP NOP option */
+#define PF_OSFP_TCPOPT_WSCALE 0x1 /* TCP window scaling option */
+#define PF_OSFP_TCPOPT_MSS 0x2 /* TCP max segment size opt */
+#define PF_OSFP_TCPOPT_SACK 0x3 /* TCP SACK OK option */
+#define PF_OSFP_TCPOPT_TS 0x4 /* TCP timestamp option */
+#define PF_OSFP_TCPOPT_BITS 3 /* bits used by each option */
+#define PF_OSFP_MAX_OPTS \
+ (sizeof(((struct pf_os_fingerprint *)0)->fp_tcpopts) * 8) \
+ / PF_OSFP_TCPOPT_BITS
+
+ SLIST_ENTRY(pf_os_fingerprint) fp_next;
+};
+
+struct pf_osfp_ioctl {
+ struct pf_osfp_entry fp_os;
+ pf_tcpopts_t fp_tcpopts; /* packed TCP options */
+ u_int16_t fp_wsize; /* TCP window size */
+ u_int16_t fp_psize; /* ip->ip_len */
+ u_int16_t fp_mss; /* TCP MSS */
+ u_int16_t fp_flags;
+ u_int8_t fp_optcnt; /* TCP option count */
+ u_int8_t fp_wscale; /* TCP window scaling */
+ u_int8_t fp_ttl; /* IPv4 TTL */
+
+ int fp_getnum; /* DIOCOSFPGET number */
+};
+
+
+union pf_rule_ptr {
+ struct pf_rule *ptr;
+ u_int32_t nr;
+};
+
+#define PF_ANCHOR_NAME_SIZE 64
+
+struct pf_rule {
+ struct pf_rule_addr src;
+ struct pf_rule_addr dst;
+#define PF_SKIP_IFP 0
+#define PF_SKIP_DIR 1
+#define PF_SKIP_AF 2
+#define PF_SKIP_PROTO 3
+#define PF_SKIP_SRC_ADDR 4
+#define PF_SKIP_SRC_PORT 5
+#define PF_SKIP_DST_ADDR 6
+#define PF_SKIP_DST_PORT 7
+#define PF_SKIP_COUNT 8
+ union pf_rule_ptr skip[PF_SKIP_COUNT];
+#define PF_RULE_LABEL_SIZE 64
+ char label[PF_RULE_LABEL_SIZE];
+ char ifname[IFNAMSIZ];
+ char qname[PF_QNAME_SIZE];
+ char pqname[PF_QNAME_SIZE];
+#define PF_TAG_NAME_SIZE 64
+ char tagname[PF_TAG_NAME_SIZE];
+ char match_tagname[PF_TAG_NAME_SIZE];
+
+ char overload_tblname[PF_TABLE_NAME_SIZE];
+
+ TAILQ_ENTRY(pf_rule) entries;
+ struct pf_pool rpool;
+
+ u_int64_t evaluations;
+ u_int64_t packets[2];
+ u_int64_t bytes[2];
+
+ struct pfi_kif *kif;
+ struct pf_anchor *anchor;
+ struct pfr_ktable *overload_tbl;
+
+ pf_osfp_t os_fingerprint;
+
+ int rtableid;
+ u_int32_t timeout[PFTM_MAX];
+ u_int32_t max_states;
+ u_int32_t max_src_nodes;
+ u_int32_t max_src_states;
+ u_int32_t max_src_conn;
+ struct {
+ u_int32_t limit;
+ u_int32_t seconds;
+ } max_src_conn_rate;
+ u_int32_t qid;
+ u_int32_t pqid;
+ u_int32_t rt_listid;
+ u_int32_t nr;
+ u_int32_t prob;
+ uid_t cuid;
+ pid_t cpid;
+
+ counter_u64_t states_cur;
+ counter_u64_t states_tot;
+ counter_u64_t src_nodes;
+
+ u_int16_t return_icmp;
+ u_int16_t return_icmp6;
+ u_int16_t max_mss;
+ u_int16_t tag;
+ u_int16_t match_tag;
+ u_int16_t spare2; /* netgraph */
+
+ struct pf_rule_uid uid;
+ struct pf_rule_gid gid;
+
+ u_int32_t rule_flag;
+ u_int8_t action;
+ u_int8_t direction;
+ u_int8_t log;
+ u_int8_t logif;
+ u_int8_t quick;
+ u_int8_t ifnot;
+ u_int8_t match_tag_not;
+ u_int8_t natpass;
+
+#define PF_STATE_NORMAL 0x1
+#define PF_STATE_MODULATE 0x2
+#define PF_STATE_SYNPROXY 0x3
+ u_int8_t keep_state;
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t type;
+ u_int8_t code;
+ u_int8_t flags;
+ u_int8_t flagset;
+ u_int8_t min_ttl;
+ u_int8_t allow_opts;
+ u_int8_t rt;
+ u_int8_t return_ttl;
+ u_int8_t tos;
+ u_int8_t set_tos;
+ u_int8_t anchor_relative;
+ u_int8_t anchor_wildcard;
+
+#define PF_FLUSH 0x01
+#define PF_FLUSH_GLOBAL 0x02
+ u_int8_t flush;
+
+ struct {
+ struct pf_addr addr;
+ u_int16_t port;
+ } divert;
+
+ uint64_t u_states_cur;
+ uint64_t u_states_tot;
+ uint64_t u_src_nodes;
+};
+
+/* rule flags */
+#define PFRULE_DROP 0x0000
+#define PFRULE_RETURNRST 0x0001
+#define PFRULE_FRAGMENT 0x0002
+#define PFRULE_RETURNICMP 0x0004
+#define PFRULE_RETURN 0x0008
+#define PFRULE_NOSYNC 0x0010
+#define PFRULE_SRCTRACK 0x0020 /* track source states */
+#define PFRULE_RULESRCTRACK 0x0040 /* per rule */
+#define PFRULE_REFS 0x0080 /* rule has references */
+
+/* scrub flags */
+#define PFRULE_NODF 0x0100
+#define PFRULE_FRAGCROP 0x0200 /* non-buffering frag cache */
+#define PFRULE_FRAGDROP 0x0400 /* drop funny fragments */
+#define PFRULE_RANDOMID 0x0800
+#define PFRULE_REASSEMBLE_TCP 0x1000
+#define PFRULE_SET_TOS 0x2000
+
+/* rule flags again */
+#define PFRULE_IFBOUND 0x00010000 /* if-bound */
+#define PFRULE_STATESLOPPY 0x00020000 /* sloppy state tracking */
+
+#define PFSTATE_HIWAT 10000 /* default state table size */
+#define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */
+#define PFSTATE_ADAPT_END 12000 /* default adaptive timeout end */
+
+
+struct pf_threshold {
+ u_int32_t limit;
+#define PF_THRESHOLD_MULT 1000
+#define PF_THRESHOLD_MAX 0xffffffff / PF_THRESHOLD_MULT
+ u_int32_t seconds;
+ u_int32_t count;
+ u_int32_t last;
+};
+
+struct pf_src_node {
+ LIST_ENTRY(pf_src_node) entry;
+ struct pf_addr addr;
+ struct pf_addr raddr;
+ union pf_rule_ptr rule;
+ struct pfi_kif *kif;
+ u_int64_t bytes[2];
+ u_int64_t packets[2];
+ u_int32_t states;
+ u_int32_t conn;
+ struct pf_threshold conn_rate;
+ u_int32_t creation;
+ u_int32_t expire;
+ sa_family_t af;
+ u_int8_t ruletype;
+};
+
+#define PFSNODE_HIWAT 10000 /* default source node table size */
+
+struct pf_state_scrub {
+ struct timeval pfss_last; /* time received last packet */
+ u_int32_t pfss_tsecr; /* last echoed timestamp */
+ u_int32_t pfss_tsval; /* largest timestamp */
+ u_int32_t pfss_tsval0; /* original timestamp */
+ u_int16_t pfss_flags;
+#define PFSS_TIMESTAMP 0x0001 /* modulate timestamp */
+#define PFSS_PAWS 0x0010 /* stricter PAWS checks */
+#define PFSS_PAWS_IDLED 0x0020 /* was idle too long. no PAWS */
+#define PFSS_DATA_TS 0x0040 /* timestamp on data packets */
+#define PFSS_DATA_NOTS 0x0080 /* no timestamp on data packets */
+ u_int8_t pfss_ttl; /* stashed TTL */
+ u_int8_t pad;
+ u_int32_t pfss_ts_mod; /* timestamp modulation */
+};
+
+struct pf_state_host {
+ struct pf_addr addr;
+ u_int16_t port;
+ u_int16_t pad;
+};
+
+struct pf_state_peer {
+ struct pf_state_scrub *scrub; /* state is scrubbed */
+ u_int32_t seqlo; /* Max sequence number sent */
+ u_int32_t seqhi; /* Max the other end ACKd + win */
+ u_int32_t seqdiff; /* Sequence number modulator */
+ u_int16_t max_win; /* largest window (pre scaling) */
+ u_int16_t mss; /* Maximum segment size option */
+ u_int8_t state; /* active state level */
+ u_int8_t wscale; /* window scaling factor */
+ u_int8_t tcp_est; /* Did we reach TCPS_ESTABLISHED */
+ u_int8_t pad[1];
+};
+
+/* Keep synced with struct pf_state_key. */
+struct pf_state_key_cmp {
+ struct pf_addr addr[2];
+ u_int16_t port[2];
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t pad[2];
+};
+
+struct pf_state_key {
+ struct pf_addr addr[2];
+ u_int16_t port[2];
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t pad[2];
+
+ LIST_ENTRY(pf_state_key) entry;
+ TAILQ_HEAD(, pf_state) states[2];
+};
+
+/* Keep synced with struct pf_state. */
+struct pf_state_cmp {
+ u_int64_t id;
+ u_int32_t creatorid;
+ u_int8_t direction;
+ u_int8_t pad[3];
+};
+
+struct pf_state {
+ u_int64_t id;
+ u_int32_t creatorid;
+ u_int8_t direction;
+ u_int8_t pad[3];
+
+ u_int refs;
+ TAILQ_ENTRY(pf_state) sync_list;
+ TAILQ_ENTRY(pf_state) key_list[2];
+ LIST_ENTRY(pf_state) entry;
+ struct pf_state_peer src;
+ struct pf_state_peer dst;
+ union pf_rule_ptr rule;
+ union pf_rule_ptr anchor;
+ union pf_rule_ptr nat_rule;
+ struct pf_addr rt_addr;
+ struct pf_state_key *key[2]; /* addresses stack and wire */
+ struct pfi_kif *kif;
+ struct pfi_kif *rt_kif;
+ struct pf_src_node *src_node;
+ struct pf_src_node *nat_src_node;
+ u_int64_t packets[2];
+ u_int64_t bytes[2];
+ u_int32_t creation;
+ u_int32_t expire;
+ u_int32_t pfsync_time;
+ u_int16_t tag;
+ u_int8_t log;
+ u_int8_t state_flags;
+#define PFSTATE_ALLOWOPTS 0x01
+#define PFSTATE_SLOPPY 0x02
+/* was PFSTATE_PFLOW 0x04 */
+#define PFSTATE_NOSYNC 0x08
+#define PFSTATE_ACK 0x10
+ u_int8_t timeout;
+ u_int8_t sync_state; /* PFSYNC_S_x */
+
+ /* XXX */
+ u_int8_t sync_updates;
+ u_int8_t _tail[3];
+};
+
+/*
+ * Unified state structures for pulling states out of the kernel
+ * used by pfsync(4) and the pf(4) ioctl.
+ */
+struct pfsync_state_scrub {
+ u_int16_t pfss_flags;
+ u_int8_t pfss_ttl; /* stashed TTL */
+#define PFSYNC_SCRUB_FLAG_VALID 0x01
+ u_int8_t scrub_flag;
+ u_int32_t pfss_ts_mod; /* timestamp modulation */
+} __packed;
+
+struct pfsync_state_peer {
+ struct pfsync_state_scrub scrub; /* state is scrubbed */
+ u_int32_t seqlo; /* Max sequence number sent */
+ u_int32_t seqhi; /* Max the other end ACKd + win */
+ u_int32_t seqdiff; /* Sequence number modulator */
+ u_int16_t max_win; /* largest window (pre scaling) */
+ u_int16_t mss; /* Maximum segment size option */
+ u_int8_t state; /* active state level */
+ u_int8_t wscale; /* window scaling factor */
+ u_int8_t pad[6];
+} __packed;
+
+struct pfsync_state_key {
+ struct pf_addr addr[2];
+ u_int16_t port[2];
+};
+
+struct pfsync_state {
+ u_int64_t id;
+ char ifname[IFNAMSIZ];
+ struct pfsync_state_key key[2];
+ struct pfsync_state_peer src;
+ struct pfsync_state_peer dst;
+ struct pf_addr rt_addr;
+ u_int32_t rule;
+ u_int32_t anchor;
+ u_int32_t nat_rule;
+ u_int32_t creation;
+ u_int32_t expire;
+ u_int32_t packets[2][2];
+ u_int32_t bytes[2][2];
+ u_int32_t creatorid;
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t direction;
+ u_int8_t __spare[2];
+ u_int8_t log;
+ u_int8_t state_flags;
+ u_int8_t timeout;
+ u_int8_t sync_flags;
+ u_int8_t updates;
+} __packed;
+
+#ifdef _KERNEL
+/* pfsync */
+typedef int pfsync_state_import_t(struct pfsync_state *, u_int8_t);
+typedef void pfsync_insert_state_t(struct pf_state *);
+typedef void pfsync_update_state_t(struct pf_state *);
+typedef void pfsync_delete_state_t(struct pf_state *);
+typedef void pfsync_clear_states_t(u_int32_t, const char *);
+typedef int pfsync_defer_t(struct pf_state *, struct mbuf *);
+
+extern pfsync_state_import_t *pfsync_state_import_ptr;
+extern pfsync_insert_state_t *pfsync_insert_state_ptr;
+extern pfsync_update_state_t *pfsync_update_state_ptr;
+extern pfsync_delete_state_t *pfsync_delete_state_ptr;
+extern pfsync_clear_states_t *pfsync_clear_states_ptr;
+extern pfsync_defer_t *pfsync_defer_ptr;
+
+void pfsync_state_export(struct pfsync_state *,
+ struct pf_state *);
+
+/* pflog */
+struct pf_ruleset;
+struct pf_pdesc;
+typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t,
+ u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *,
+ struct pf_ruleset *, struct pf_pdesc *, int);
+extern pflog_packet_t *pflog_packet_ptr;
+
+#define V_pf_end_threads VNET(pf_end_threads)
+#endif /* _KERNEL */
+
+#define PFSYNC_FLAG_SRCNODE 0x04
+#define PFSYNC_FLAG_NATSRCNODE 0x08
+
+/* for copies to/from network byte order */
+/* ioctl interface also uses network byte order */
+#define pf_state_peer_hton(s,d) do { \
+ (d)->seqlo = htonl((s)->seqlo); \
+ (d)->seqhi = htonl((s)->seqhi); \
+ (d)->seqdiff = htonl((s)->seqdiff); \
+ (d)->max_win = htons((s)->max_win); \
+ (d)->mss = htons((s)->mss); \
+ (d)->state = (s)->state; \
+ (d)->wscale = (s)->wscale; \
+ if ((s)->scrub) { \
+ (d)->scrub.pfss_flags = \
+ htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \
+ (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \
+ (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\
+ (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \
+ } \
+} while (0)
+
+#define pf_state_peer_ntoh(s,d) do { \
+ (d)->seqlo = ntohl((s)->seqlo); \
+ (d)->seqhi = ntohl((s)->seqhi); \
+ (d)->seqdiff = ntohl((s)->seqdiff); \
+ (d)->max_win = ntohs((s)->max_win); \
+ (d)->mss = ntohs((s)->mss); \
+ (d)->state = (s)->state; \
+ (d)->wscale = (s)->wscale; \
+ if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \
+ (d)->scrub != NULL) { \
+ (d)->scrub->pfss_flags = \
+ ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \
+ (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \
+ (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\
+ } \
+} while (0)
+
+#define pf_state_counter_hton(s,d) do { \
+ d[0] = htonl((s>>32)&0xffffffff); \
+ d[1] = htonl(s&0xffffffff); \
+} while (0)
+
+#define pf_state_counter_from_pfsync(s) \
+ (((u_int64_t)(s[0])<<32) | (u_int64_t)(s[1]))
+
+#define pf_state_counter_ntoh(s,d) do { \
+ d = ntohl(s[0]); \
+ d = d<<32; \
+ d += ntohl(s[1]); \
+} while (0)
+
+TAILQ_HEAD(pf_rulequeue, pf_rule);
+
+struct pf_anchor;
+
+struct pf_ruleset {
+ struct {
+ struct pf_rulequeue queues[2];
+ struct {
+ struct pf_rulequeue *ptr;
+ struct pf_rule **ptr_array;
+ u_int32_t rcount;
+ u_int32_t ticket;
+ int open;
+ } active, inactive;
+ } rules[PF_RULESET_MAX];
+ struct pf_anchor *anchor;
+ u_int32_t tticket;
+ int tables;
+ int topen;
+};
+
+RB_HEAD(pf_anchor_global, pf_anchor);
+RB_HEAD(pf_anchor_node, pf_anchor);
+struct pf_anchor {
+ RB_ENTRY(pf_anchor) entry_global;
+ RB_ENTRY(pf_anchor) entry_node;
+ struct pf_anchor *parent;
+ struct pf_anchor_node children;
+ char name[PF_ANCHOR_NAME_SIZE];
+ char path[MAXPATHLEN];
+ struct pf_ruleset ruleset;
+ int refcnt; /* anchor rules */
+ int match; /* XXX: used for pfctl black magic */
+};
+RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare);
+RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare);
+
+#define PF_RESERVED_ANCHOR "_pf"
+
+#define PFR_TFLAG_PERSIST 0x00000001
+#define PFR_TFLAG_CONST 0x00000002
+#define PFR_TFLAG_ACTIVE 0x00000004
+#define PFR_TFLAG_INACTIVE 0x00000008
+#define PFR_TFLAG_REFERENCED 0x00000010
+#define PFR_TFLAG_REFDANCHOR 0x00000020
+#define PFR_TFLAG_COUNTERS 0x00000040
+/* Adjust masks below when adding flags. */
+#define PFR_TFLAG_USRMASK (PFR_TFLAG_PERSIST | \
+ PFR_TFLAG_CONST | \
+ PFR_TFLAG_COUNTERS)
+#define PFR_TFLAG_SETMASK (PFR_TFLAG_ACTIVE | \
+ PFR_TFLAG_INACTIVE | \
+ PFR_TFLAG_REFERENCED | \
+ PFR_TFLAG_REFDANCHOR)
+#define PFR_TFLAG_ALLMASK (PFR_TFLAG_PERSIST | \
+ PFR_TFLAG_CONST | \
+ PFR_TFLAG_ACTIVE | \
+ PFR_TFLAG_INACTIVE | \
+ PFR_TFLAG_REFERENCED | \
+ PFR_TFLAG_REFDANCHOR | \
+ PFR_TFLAG_COUNTERS)
+
+struct pf_anchor_stackframe;
+
+struct pfr_table {
+ char pfrt_anchor[MAXPATHLEN];
+ char pfrt_name[PF_TABLE_NAME_SIZE];
+ u_int32_t pfrt_flags;
+ u_int8_t pfrt_fback;
+};
+
+enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED,
+ PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE,
+ PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_NOCOUNT, PFR_FB_MAX };
+
+struct pfr_addr {
+ union {
+ struct in_addr _pfra_ip4addr;
+ struct in6_addr _pfra_ip6addr;
+ } pfra_u;
+ u_int8_t pfra_af;
+ u_int8_t pfra_net;
+ u_int8_t pfra_not;
+ u_int8_t pfra_fback;
+};
+#define pfra_ip4addr pfra_u._pfra_ip4addr
+#define pfra_ip6addr pfra_u._pfra_ip6addr
+
+enum { PFR_DIR_IN, PFR_DIR_OUT, PFR_DIR_MAX };
+enum { PFR_OP_BLOCK, PFR_OP_PASS, PFR_OP_ADDR_MAX, PFR_OP_TABLE_MAX };
+#define PFR_OP_XPASS PFR_OP_ADDR_MAX
+
+struct pfr_astats {
+ struct pfr_addr pfras_a;
+ u_int64_t pfras_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ u_int64_t pfras_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ long pfras_tzero;
+};
+
+enum { PFR_REFCNT_RULE, PFR_REFCNT_ANCHOR, PFR_REFCNT_MAX };
+
+struct pfr_tstats {
+ struct pfr_table pfrts_t;
+ u_int64_t pfrts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+ u_int64_t pfrts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+ u_int64_t pfrts_match;
+ u_int64_t pfrts_nomatch;
+ long pfrts_tzero;
+ int pfrts_cnt;
+ int pfrts_refcnt[PFR_REFCNT_MAX];
+};
+#define pfrts_name pfrts_t.pfrt_name
+#define pfrts_flags pfrts_t.pfrt_flags
+
+#ifndef _SOCKADDR_UNION_DEFINED
+#define _SOCKADDR_UNION_DEFINED
+union sockaddr_union {
+ struct sockaddr sa;
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+};
+#endif /* _SOCKADDR_UNION_DEFINED */
+
+struct pfr_kcounters {
+ u_int64_t pfrkc_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ u_int64_t pfrkc_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+};
+
+SLIST_HEAD(pfr_kentryworkq, pfr_kentry);
+struct pfr_kentry {
+ struct radix_node pfrke_node[2];
+ union sockaddr_union pfrke_sa;
+ SLIST_ENTRY(pfr_kentry) pfrke_workq;
+ struct pfr_kcounters *pfrke_counters;
+ long pfrke_tzero;
+ u_int8_t pfrke_af;
+ u_int8_t pfrke_net;
+ u_int8_t pfrke_not;
+ u_int8_t pfrke_mark;
+};
+
+SLIST_HEAD(pfr_ktableworkq, pfr_ktable);
+RB_HEAD(pfr_ktablehead, pfr_ktable);
+struct pfr_ktable {
+ struct pfr_tstats pfrkt_ts;
+ RB_ENTRY(pfr_ktable) pfrkt_tree;
+ SLIST_ENTRY(pfr_ktable) pfrkt_workq;
+ struct radix_node_head *pfrkt_ip4;
+ struct radix_node_head *pfrkt_ip6;
+ struct pfr_ktable *pfrkt_shadow;
+ struct pfr_ktable *pfrkt_root;
+ struct pf_ruleset *pfrkt_rs;
+ long pfrkt_larg;
+ int pfrkt_nflags;
+};
+#define pfrkt_t pfrkt_ts.pfrts_t
+#define pfrkt_name pfrkt_t.pfrt_name
+#define pfrkt_anchor pfrkt_t.pfrt_anchor
+#define pfrkt_ruleset pfrkt_t.pfrt_ruleset
+#define pfrkt_flags pfrkt_t.pfrt_flags
+#define pfrkt_cnt pfrkt_ts.pfrts_cnt
+#define pfrkt_refcnt pfrkt_ts.pfrts_refcnt
+#define pfrkt_packets pfrkt_ts.pfrts_packets
+#define pfrkt_bytes pfrkt_ts.pfrts_bytes
+#define pfrkt_match pfrkt_ts.pfrts_match
+#define pfrkt_nomatch pfrkt_ts.pfrts_nomatch
+#define pfrkt_tzero pfrkt_ts.pfrts_tzero
+
+/* keep synced with pfi_kif, used in RB_FIND */
+struct pfi_kif_cmp {
+ char pfik_name[IFNAMSIZ];
+};
+
+struct pfi_kif {
+ char pfik_name[IFNAMSIZ];
+ union {
+ RB_ENTRY(pfi_kif) _pfik_tree;
+ LIST_ENTRY(pfi_kif) _pfik_list;
+ } _pfik_glue;
+#define pfik_tree _pfik_glue._pfik_tree
+#define pfik_list _pfik_glue._pfik_list
+ u_int64_t pfik_packets[2][2][2];
+ u_int64_t pfik_bytes[2][2][2];
+ u_int32_t pfik_tzero;
+ u_int pfik_flags;
+ struct ifnet *pfik_ifp;
+ struct ifg_group *pfik_group;
+ u_int pfik_rulerefs;
+ TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs;
+};
+
+#define PFI_IFLAG_REFS 0x0001 /* has state references */
+#define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */
+
+struct pf_pdesc {
+ struct {
+ int done;
+ uid_t uid;
+ gid_t gid;
+ } lookup;
+ u_int64_t tot_len; /* Make Mickey money */
+ union {
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+ struct icmp *icmp;
+#ifdef INET6
+ struct icmp6_hdr *icmp6;
+#endif /* INET6 */
+ void *any;
+ } hdr;
+
+ struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */
+ struct pf_addr *src; /* src address */
+ struct pf_addr *dst; /* dst address */
+ u_int16_t *sport;
+ u_int16_t *dport;
+ struct pf_mtag *pf_mtag;
+
+ u_int32_t p_len; /* total length of payload */
+
+ u_int16_t *ip_sum;
+ u_int16_t *proto_sum;
+ u_int16_t flags; /* Let SCRUB trigger behavior in
+ * state code. Easier than tags */
+#define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */
+#define PFDESC_IP_REAS 0x0002 /* IP frags would've been reassembled */
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t tos;
+ u_int8_t dir; /* direction */
+ u_int8_t sidx; /* key index for source */
+ u_int8_t didx; /* key index for destination */
+};
+
+/* flags for RDR options */
+#define PF_DPORT_RANGE 0x01 /* Dest port uses range */
+#define PF_RPORT_RANGE 0x02 /* RDR'ed port uses range */
+
+/* UDP state enumeration */
+#define PFUDPS_NO_TRAFFIC 0
+#define PFUDPS_SINGLE 1
+#define PFUDPS_MULTIPLE 2
+
+#define PFUDPS_NSTATES 3 /* number of state levels */
+
+#define PFUDPS_NAMES { \
+ "NO_TRAFFIC", \
+ "SINGLE", \
+ "MULTIPLE", \
+ NULL \
+}
+
+/* Other protocol state enumeration */
+#define PFOTHERS_NO_TRAFFIC 0
+#define PFOTHERS_SINGLE 1
+#define PFOTHERS_MULTIPLE 2
+
+#define PFOTHERS_NSTATES 3 /* number of state levels */
+
+#define PFOTHERS_NAMES { \
+ "NO_TRAFFIC", \
+ "SINGLE", \
+ "MULTIPLE", \
+ NULL \
+}
+
+#define ACTION_SET(a, x) \
+ do { \
+ if ((a) != NULL) \
+ *(a) = (x); \
+ } while (0)
+
+#define REASON_SET(a, x) \
+ do { \
+ if ((a) != NULL) \
+ *(a) = (x); \
+ if (x < PFRES_MAX) \
+ counter_u64_add(V_pf_status.counters[x], 1); \
+ } while (0)
+
+struct pf_kstatus {
+ counter_u64_t counters[PFRES_MAX]; /* reason for passing/dropping */
+ counter_u64_t lcounters[LCNT_MAX]; /* limit counters */
+ counter_u64_t fcounters[FCNT_MAX]; /* state operation counters */
+ counter_u64_t scounters[SCNT_MAX]; /* src_node operation counters */
+ uint32_t states;
+ uint32_t src_nodes;
+ uint32_t running;
+ uint32_t since;
+ uint32_t debug;
+ uint32_t hostid;
+ char ifname[IFNAMSIZ];
+ uint8_t pf_chksum[PF_MD5_DIGEST_LENGTH];
+};
+
+struct pf_divert {
+ union {
+ struct in_addr ipv4;
+ struct in6_addr ipv6;
+ } addr;
+ u_int16_t port;
+};
+
+#define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */
+#define PFR_KENTRY_HIWAT 200000 /* Number of table entries */
+
+/*
+ * ioctl parameter structures
+ */
+
+struct pfioc_pooladdr {
+ u_int32_t action;
+ u_int32_t ticket;
+ u_int32_t nr;
+ u_int32_t r_num;
+ u_int8_t r_action;
+ u_int8_t r_last;
+ u_int8_t af;
+ char anchor[MAXPATHLEN];
+ struct pf_pooladdr addr;
+};
+
+struct pfioc_rule {
+ u_int32_t action;
+ u_int32_t ticket;
+ u_int32_t pool_ticket;
+ u_int32_t nr;
+ char anchor[MAXPATHLEN];
+ char anchor_call[MAXPATHLEN];
+ struct pf_rule rule;
+};
+
+struct pfioc_natlook {
+ struct pf_addr saddr;
+ struct pf_addr daddr;
+ struct pf_addr rsaddr;
+ struct pf_addr rdaddr;
+ u_int16_t sport;
+ u_int16_t dport;
+ u_int16_t rsport;
+ u_int16_t rdport;
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t direction;
+};
+
+struct pfioc_state {
+ struct pfsync_state state;
+};
+
+struct pfioc_src_node_kill {
+ sa_family_t psnk_af;
+ struct pf_rule_addr psnk_src;
+ struct pf_rule_addr psnk_dst;
+ u_int psnk_killed;
+};
+
+struct pfioc_state_kill {
+ struct pf_state_cmp psk_pfcmp;
+ sa_family_t psk_af;
+ int psk_proto;
+ struct pf_rule_addr psk_src;
+ struct pf_rule_addr psk_dst;
+ char psk_ifname[IFNAMSIZ];
+ char psk_label[PF_RULE_LABEL_SIZE];
+ u_int psk_killed;
+};
+
+struct pfioc_states {
+ int ps_len;
+ union {
+ caddr_t psu_buf;
+ struct pfsync_state *psu_states;
+ } ps_u;
+#define ps_buf ps_u.psu_buf
+#define ps_states ps_u.psu_states
+};
+
+struct pfioc_src_nodes {
+ int psn_len;
+ union {
+ caddr_t psu_buf;
+ struct pf_src_node *psu_src_nodes;
+ } psn_u;
+#define psn_buf psn_u.psu_buf
+#define psn_src_nodes psn_u.psu_src_nodes
+};
+
+struct pfioc_if {
+ char ifname[IFNAMSIZ];
+};
+
+struct pfioc_tm {
+ int timeout;
+ int seconds;
+};
+
+struct pfioc_limit {
+ int index;
+ unsigned limit;
+};
+
+struct pfioc_altq {
+ u_int32_t action;
+ u_int32_t ticket;
+ u_int32_t nr;
+ struct pf_altq altq;
+};
+
+struct pfioc_qstats {
+ u_int32_t ticket;
+ u_int32_t nr;
+ void *buf;
+ int nbytes;
+ u_int8_t scheduler;
+};
+
+struct pfioc_ruleset {
+ u_int32_t nr;
+ char path[MAXPATHLEN];
+ char name[PF_ANCHOR_NAME_SIZE];
+};
+
+#define PF_RULESET_ALTQ (PF_RULESET_MAX)
+#define PF_RULESET_TABLE (PF_RULESET_MAX+1)
+struct pfioc_trans {
+ int size; /* number of elements */
+ int esize; /* size of each element in bytes */
+ struct pfioc_trans_e {
+ int rs_num;
+ char anchor[MAXPATHLEN];
+ u_int32_t ticket;
+ } *array;
+};
+
+#define PFR_FLAG_ATOMIC 0x00000001 /* unused */
+#define PFR_FLAG_DUMMY 0x00000002
+#define PFR_FLAG_FEEDBACK 0x00000004
+#define PFR_FLAG_CLSTATS 0x00000008
+#define PFR_FLAG_ADDRSTOO 0x00000010
+#define PFR_FLAG_REPLACE 0x00000020
+#define PFR_FLAG_ALLRSETS 0x00000040
+#define PFR_FLAG_ALLMASK 0x0000007F
+#ifdef _KERNEL
+#define PFR_FLAG_USERIOCTL 0x10000000
+#endif
+
+struct pfioc_table {
+ struct pfr_table pfrio_table;
+ void *pfrio_buffer;
+ int pfrio_esize;
+ int pfrio_size;
+ int pfrio_size2;
+ int pfrio_nadd;
+ int pfrio_ndel;
+ int pfrio_nchange;
+ int pfrio_flags;
+ u_int32_t pfrio_ticket;
+};
+#define pfrio_exists pfrio_nadd
+#define pfrio_nzero pfrio_nadd
+#define pfrio_nmatch pfrio_nadd
+#define pfrio_naddr pfrio_size2
+#define pfrio_setflag pfrio_size2
+#define pfrio_clrflag pfrio_nadd
+
+struct pfioc_iface {
+ char pfiio_name[IFNAMSIZ];
+ void *pfiio_buffer;
+ int pfiio_esize;
+ int pfiio_size;
+ int pfiio_nzero;
+ int pfiio_flags;
+};
+
+
+/*
+ * ioctl operations
+ */
+
+#define DIOCSTART _IO ('D', 1)
+#define DIOCSTOP _IO ('D', 2)
+#define DIOCADDRULE _IOWR('D', 4, struct pfioc_rule)
+#define DIOCGETRULES _IOWR('D', 6, struct pfioc_rule)
+#define DIOCGETRULE _IOWR('D', 7, struct pfioc_rule)
+/* XXX cut 8 - 17 */
+#define DIOCCLRSTATES _IOWR('D', 18, struct pfioc_state_kill)
+#define DIOCGETSTATE _IOWR('D', 19, struct pfioc_state)
+#define DIOCSETSTATUSIF _IOWR('D', 20, struct pfioc_if)
+#define DIOCGETSTATUS _IOWR('D', 21, struct pf_status)
+#define DIOCCLRSTATUS _IO ('D', 22)
+#define DIOCNATLOOK _IOWR('D', 23, struct pfioc_natlook)
+#define DIOCSETDEBUG _IOWR('D', 24, u_int32_t)
+#define DIOCGETSTATES _IOWR('D', 25, struct pfioc_states)
+#define DIOCCHANGERULE _IOWR('D', 26, struct pfioc_rule)
+/* XXX cut 26 - 28 */
+#define DIOCSETTIMEOUT _IOWR('D', 29, struct pfioc_tm)
+#define DIOCGETTIMEOUT _IOWR('D', 30, struct pfioc_tm)
+#define DIOCADDSTATE _IOWR('D', 37, struct pfioc_state)
+#define DIOCCLRRULECTRS _IO ('D', 38)
+#define DIOCGETLIMIT _IOWR('D', 39, struct pfioc_limit)
+#define DIOCSETLIMIT _IOWR('D', 40, struct pfioc_limit)
+#define DIOCKILLSTATES _IOWR('D', 41, struct pfioc_state_kill)
+#define DIOCSTARTALTQ _IO ('D', 42)
+#define DIOCSTOPALTQ _IO ('D', 43)
+#define DIOCADDALTQ _IOWR('D', 45, struct pfioc_altq)
+#define DIOCGETALTQS _IOWR('D', 47, struct pfioc_altq)
+#define DIOCGETALTQ _IOWR('D', 48, struct pfioc_altq)
+#define DIOCCHANGEALTQ _IOWR('D', 49, struct pfioc_altq)
+#define DIOCGETQSTATS _IOWR('D', 50, struct pfioc_qstats)
+#define DIOCBEGINADDRS _IOWR('D', 51, struct pfioc_pooladdr)
+#define DIOCADDADDR _IOWR('D', 52, struct pfioc_pooladdr)
+#define DIOCGETADDRS _IOWR('D', 53, struct pfioc_pooladdr)
+#define DIOCGETADDR _IOWR('D', 54, struct pfioc_pooladdr)
+#define DIOCCHANGEADDR _IOWR('D', 55, struct pfioc_pooladdr)
+/* XXX cut 55 - 57 */
+#define DIOCGETRULESETS _IOWR('D', 58, struct pfioc_ruleset)
+#define DIOCGETRULESET _IOWR('D', 59, struct pfioc_ruleset)
+#define DIOCRCLRTABLES _IOWR('D', 60, struct pfioc_table)
+#define DIOCRADDTABLES _IOWR('D', 61, struct pfioc_table)
+#define DIOCRDELTABLES _IOWR('D', 62, struct pfioc_table)
+#define DIOCRGETTABLES _IOWR('D', 63, struct pfioc_table)
+#define DIOCRGETTSTATS _IOWR('D', 64, struct pfioc_table)
+#define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table)
+#define DIOCRCLRADDRS _IOWR('D', 66, struct pfioc_table)
+#define DIOCRADDADDRS _IOWR('D', 67, struct pfioc_table)
+#define DIOCRDELADDRS _IOWR('D', 68, struct pfioc_table)
+#define DIOCRSETADDRS _IOWR('D', 69, struct pfioc_table)
+#define DIOCRGETADDRS _IOWR('D', 70, struct pfioc_table)
+#define DIOCRGETASTATS _IOWR('D', 71, struct pfioc_table)
+#define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table)
+#define DIOCRTSTADDRS _IOWR('D', 73, struct pfioc_table)
+#define DIOCRSETTFLAGS _IOWR('D', 74, struct pfioc_table)
+#define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table)
+#define DIOCOSFPFLUSH _IO('D', 78)
+#define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl)
+#define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl)
+#define DIOCXBEGIN _IOWR('D', 81, struct pfioc_trans)
+#define DIOCXCOMMIT _IOWR('D', 82, struct pfioc_trans)
+#define DIOCXROLLBACK _IOWR('D', 83, struct pfioc_trans)
+#define DIOCGETSRCNODES _IOWR('D', 84, struct pfioc_src_nodes)
+#define DIOCCLRSRCNODES _IO('D', 85)
+#define DIOCSETHOSTID _IOWR('D', 86, u_int32_t)
+#define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface)
+#define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface)
+#define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface)
+#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill)
+struct pf_ifspeed {
+ char ifname[IFNAMSIZ];
+ u_int32_t baudrate;
+};
+#define DIOCGIFSPEED _IOWR('D', 92, struct pf_ifspeed)
+
+#ifdef _KERNEL
+LIST_HEAD(pf_src_node_list, pf_src_node);
+struct pf_srchash {
+ struct pf_src_node_list nodes;
+ struct mtx lock;
+};
+
+struct pf_keyhash {
+ LIST_HEAD(, pf_state_key) keys;
+ struct mtx lock;
+};
+
+struct pf_idhash {
+ LIST_HEAD(, pf_state) states;
+ struct mtx lock;
+};
+
+extern u_long pf_hashmask;
+extern u_long pf_srchashmask;
+#define PF_HASHSIZ (32768)
+#define PF_SRCHASHSIZ (PF_HASHSIZ/4)
+VNET_DECLARE(struct pf_keyhash *, pf_keyhash);
+VNET_DECLARE(struct pf_idhash *, pf_idhash);
+#define V_pf_keyhash VNET(pf_keyhash)
+#define V_pf_idhash VNET(pf_idhash)
+VNET_DECLARE(struct pf_srchash *, pf_srchash);
+#define V_pf_srchash VNET(pf_srchash)
+
+#define PF_IDHASH(s) (be64toh((s)->id) % (pf_hashmask + 1))
+
+VNET_DECLARE(void *, pf_swi_cookie);
+#define V_pf_swi_cookie VNET(pf_swi_cookie)
+
+VNET_DECLARE(uint64_t, pf_stateid[MAXCPU]);
+#define V_pf_stateid VNET(pf_stateid)
+
+TAILQ_HEAD(pf_altqqueue, pf_altq);
+VNET_DECLARE(struct pf_altqqueue, pf_altqs[2]);
+#define V_pf_altqs VNET(pf_altqs)
+VNET_DECLARE(struct pf_palist, pf_pabuf);
+#define V_pf_pabuf VNET(pf_pabuf)
+
+VNET_DECLARE(u_int32_t, ticket_altqs_active);
+#define V_ticket_altqs_active VNET(ticket_altqs_active)
+VNET_DECLARE(u_int32_t, ticket_altqs_inactive);
+#define V_ticket_altqs_inactive VNET(ticket_altqs_inactive)
+VNET_DECLARE(int, altqs_inactive_open);
+#define V_altqs_inactive_open VNET(altqs_inactive_open)
+VNET_DECLARE(u_int32_t, ticket_pabuf);
+#define V_ticket_pabuf VNET(ticket_pabuf)
+VNET_DECLARE(struct pf_altqqueue *, pf_altqs_active);
+#define V_pf_altqs_active VNET(pf_altqs_active)
+VNET_DECLARE(struct pf_altqqueue *, pf_altqs_inactive);
+#define V_pf_altqs_inactive VNET(pf_altqs_inactive)
+
+VNET_DECLARE(struct pf_rulequeue, pf_unlinked_rules);
+#define V_pf_unlinked_rules VNET(pf_unlinked_rules)
+
+void pf_initialize(void);
+void pf_mtag_initialize(void);
+void pf_mtag_cleanup(void);
+void pf_cleanup(void);
+
+struct pf_mtag *pf_get_mtag(struct mbuf *);
+
+extern void pf_calc_skip_steps(struct pf_rulequeue *);
+#ifdef ALTQ
+extern void pf_altq_ifnet_event(struct ifnet *, int);
+#endif
+VNET_DECLARE(uma_zone_t, pf_state_z);
+#define V_pf_state_z VNET(pf_state_z)
+VNET_DECLARE(uma_zone_t, pf_state_key_z);
+#define V_pf_state_key_z VNET(pf_state_key_z)
+VNET_DECLARE(uma_zone_t, pf_state_scrub_z);
+#define V_pf_state_scrub_z VNET(pf_state_scrub_z)
+
+extern void pf_purge_thread(void *);
+extern void pf_intr(void *);
+extern void pf_purge_expired_src_nodes(void);
+
+extern int pf_unlink_state(struct pf_state *, u_int);
+#define PF_ENTER_LOCKED 0x00000001
+#define PF_RETURN_LOCKED 0x00000002
+extern int pf_state_insert(struct pfi_kif *,
+ struct pf_state_key *,
+ struct pf_state_key *,
+ struct pf_state *);
+extern void pf_free_state(struct pf_state *);
+
+static __inline void
+pf_ref_state(struct pf_state *s)
+{
+
+ refcount_acquire(&s->refs);
+}
+
+static __inline int
+pf_release_state(struct pf_state *s)
+{
+
+ if (refcount_release(&s->refs)) {
+ pf_free_state(s);
+ return (1);
+ } else
+ return (0);
+}
+
+extern struct pf_state *pf_find_state_byid(uint64_t, uint32_t);
+extern struct pf_state *pf_find_state_all(struct pf_state_key_cmp *,
+ u_int, int *);
+extern struct pf_src_node *pf_find_src_node(struct pf_addr *,
+ struct pf_rule *, sa_family_t, int);
+extern void pf_unlink_src_node(struct pf_src_node *);
+extern u_int pf_free_src_nodes(struct pf_src_node_list *);
+extern void pf_print_state(struct pf_state *);
+extern void pf_print_flags(u_int8_t);
+extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
+ u_int8_t);
+extern u_int16_t pf_proto_cksum_fixup(struct mbuf *, u_int16_t,
+ u_int16_t, u_int16_t, u_int8_t);
+
+VNET_DECLARE(struct ifnet *, sync_ifp);
+#define V_sync_ifp VNET(sync_ifp);
+VNET_DECLARE(struct pf_rule, pf_default_rule);
+#define V_pf_default_rule VNET(pf_default_rule)
+extern void pf_addrcpy(struct pf_addr *, struct pf_addr *,
+ u_int8_t);
+void pf_free_rule(struct pf_rule *);
+
+#ifdef INET
+int pf_test(int, struct ifnet *, struct mbuf **, struct inpcb *);
+int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *,
+ struct pf_pdesc *);
+#endif /* INET */
+
+#ifdef INET6
+int pf_test6(int, struct ifnet *, struct mbuf **, struct inpcb *);
+int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *,
+ struct pf_pdesc *);
+void pf_poolmask(struct pf_addr *, struct pf_addr*,
+ struct pf_addr *, struct pf_addr *, u_int8_t);
+void pf_addr_inc(struct pf_addr *, sa_family_t);
+int pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *);
+#endif /* INET6 */
+
+u_int32_t pf_new_isn(struct pf_state *);
+void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *,
+ sa_family_t);
+void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t);
+void pf_change_proto_a(struct mbuf *, void *, u_int16_t *, u_int32_t,
+ u_int8_t);
+void pf_change_tcp_a(struct mbuf *, void *, u_int16_t *, u_int32_t);
+void pf_send_deferred_syn(struct pf_state *);
+int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *,
+ struct pf_addr *, sa_family_t);
+int pf_match_addr_range(struct pf_addr *, struct pf_addr *,
+ struct pf_addr *, sa_family_t);
+int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t);
+
+void pf_normalize_init(void);
+void pf_normalize_cleanup(void);
+int pf_normalize_tcp(int, struct pfi_kif *, struct mbuf *, int, int, void *,
+ struct pf_pdesc *);
+void pf_normalize_tcp_cleanup(struct pf_state *);
+int pf_normalize_tcp_init(struct mbuf *, int, struct pf_pdesc *,
+ struct tcphdr *, struct pf_state_peer *, struct pf_state_peer *);
+int pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *,
+ u_short *, struct tcphdr *, struct pf_state *,
+ struct pf_state_peer *, struct pf_state_peer *, int *);
+u_int32_t
+ pf_state_expires(const struct pf_state *);
+void pf_purge_expired_fragments(void);
+int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *,
+ int);
+int pf_socket_lookup(int, struct pf_pdesc *, struct mbuf *);
+struct pf_state_key *pf_alloc_state_key(int);
+void pfr_initialize(void);
+void pfr_cleanup(void);
+int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t);
+void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t,
+ u_int64_t, int, int, int);
+int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, sa_family_t);
+void pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *);
+struct pfr_ktable *
+ pfr_attach_table(struct pf_ruleset *, char *);
+void pfr_detach_table(struct pfr_ktable *);
+int pfr_clr_tables(struct pfr_table *, int *, int);
+int pfr_add_tables(struct pfr_table *, int, int *, int);
+int pfr_del_tables(struct pfr_table *, int, int *, int);
+int pfr_table_count(struct pfr_table *, int);
+int pfr_get_tables(struct pfr_table *, struct pfr_table *, int *, int);
+int pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, int *, int);
+int pfr_clr_tstats(struct pfr_table *, int, int *, int);
+int pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int);
+int pfr_clr_addrs(struct pfr_table *, int *, int);
+int pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, long);
+int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+ int);
+int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+ int);
+int pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+ int *, int *, int *, int, u_int32_t);
+int pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int);
+int pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int);
+int pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *,
+ int);
+int pfr_tst_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+ int);
+int pfr_ina_begin(struct pfr_table *, u_int32_t *, int *, int);
+int pfr_ina_rollback(struct pfr_table *, u_int32_t, int *, int);
+int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int);
+int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *,
+ int *, u_int32_t, int);
+
+MALLOC_DECLARE(PFI_MTYPE);
+VNET_DECLARE(struct pfi_kif *, pfi_all);
+#define V_pfi_all VNET(pfi_all)
+
+void pfi_initialize(void);
+void pfi_cleanup(void);
+void pfi_kif_ref(struct pfi_kif *);
+void pfi_kif_unref(struct pfi_kif *);
+struct pfi_kif *pfi_kif_find(const char *);
+struct pfi_kif *pfi_kif_attach(struct pfi_kif *, const char *);
+int pfi_kif_match(struct pfi_kif *, struct pfi_kif *);
+void pfi_kif_purge(void);
+int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *,
+ sa_family_t);
+int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t);
+void pfi_dynaddr_remove(struct pfi_dynaddr *);
+void pfi_dynaddr_copyout(struct pf_addr_wrap *);
+void pfi_update_status(const char *, struct pf_status *);
+void pfi_get_ifaces(const char *, struct pfi_kif *, int *);
+int pfi_set_flags(const char *, int);
+int pfi_clear_flags(const char *, int);
+
+int pf_match_tag(struct mbuf *, struct pf_rule *, int *, int);
+int pf_tag_packet(struct mbuf *, struct pf_pdesc *, int);
+int pf_addr_cmp(struct pf_addr *, struct pf_addr *,
+ sa_family_t);
+void pf_qid2qname(u_int32_t, char *);
+
+VNET_DECLARE(struct pf_kstatus, pf_status);
+#define V_pf_status VNET(pf_status)
+
+struct pf_limit {
+ uma_zone_t zone;
+ u_int limit;
+};
+VNET_DECLARE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
+#define V_pf_limits VNET(pf_limits)
+
+#endif /* _KERNEL */
+
+#ifdef _KERNEL
+VNET_DECLARE(struct pf_anchor_global, pf_anchors);
+#define V_pf_anchors VNET(pf_anchors)
+VNET_DECLARE(struct pf_anchor, pf_main_anchor);
+#define V_pf_main_anchor VNET(pf_main_anchor)
+#define pf_main_ruleset V_pf_main_anchor.ruleset
+#endif
+
+/* these ruleset functions can be linked into userland programs (pfctl) */
+int pf_get_ruleset_number(u_int8_t);
+void pf_init_ruleset(struct pf_ruleset *);
+int pf_anchor_setup(struct pf_rule *,
+ const struct pf_ruleset *, const char *);
+int pf_anchor_copyout(const struct pf_ruleset *,
+ const struct pf_rule *, struct pfioc_rule *);
+void pf_anchor_remove(struct pf_rule *);
+void pf_remove_if_empty_ruleset(struct pf_ruleset *);
+struct pf_ruleset *pf_find_ruleset(const char *);
+struct pf_ruleset *pf_find_or_create_ruleset(const char *);
+void pf_rs_initialize(void);
+
+/* The fingerprint functions can be linked into userland programs (tcpdump) */
+int pf_osfp_add(struct pf_osfp_ioctl *);
+#ifdef _KERNEL
+struct pf_osfp_enlist *
+ pf_osfp_fingerprint(struct pf_pdesc *, struct mbuf *, int,
+ const struct tcphdr *);
+#endif /* _KERNEL */
+void pf_osfp_flush(void);
+int pf_osfp_get(struct pf_osfp_ioctl *);
+int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t);
+
+#ifdef _KERNEL
+void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
+
+void pf_step_into_anchor(struct pf_anchor_stackframe *, int *,
+ struct pf_ruleset **, int, struct pf_rule **,
+ struct pf_rule **, int *);
+int pf_step_out_of_anchor(struct pf_anchor_stackframe *, int *,
+ struct pf_ruleset **, int, struct pf_rule **,
+ struct pf_rule **, int *);
+
+int pf_map_addr(u_int8_t, struct pf_rule *,
+ struct pf_addr *, struct pf_addr *,
+ struct pf_addr *, struct pf_src_node **);
+struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *,
+ int, int, struct pfi_kif *, struct pf_src_node **,
+ struct pf_state_key **, struct pf_state_key **,
+ struct pf_addr *, struct pf_addr *,
+ uint16_t, uint16_t, struct pf_anchor_stackframe *);
+
+struct pf_state_key *pf_state_key_setup(struct pf_pdesc *, struct pf_addr *,
+ struct pf_addr *, u_int16_t, u_int16_t);
+struct pf_state_key *pf_state_key_clone(struct pf_state_key *);
+#endif /* _KERNEL */
+
+#endif /* _NET_PFVAR_H_ */
Property changes on: trunk/sys/net/pfvar.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/ppp_defs.h
===================================================================
--- trunk/sys/net/ppp_defs.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/ppp_defs.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -26,12 +26,14 @@
* OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
* OR MODIFICATIONS.
*
- * $FreeBSD: stable/9/sys/net/ppp_defs.h 139823 2005-01-07 01:45:51Z imp $
+ * $FreeBSD: stable/10/sys/net/ppp_defs.h 250887 2013-05-21 21:20:10Z ed $
*/
#ifndef _PPP_DEFS_H_
#define _PPP_DEFS_H_
+#include <sys/_types.h>
+
/*
* The basic PPP frame.
*/
@@ -84,7 +86,7 @@
/*
* Extended asyncmap - allows any character to be escaped.
*/
-typedef u_int32_t ext_accm[8];
+typedef __uint32_t ext_accm[8];
/*
* What to do with network protocol (NP) packets.
@@ -144,8 +146,8 @@
* the last NP packet was sent or received.
*/
struct ppp_idle {
- time_t xmit_idle; /* time since last NP packet sent */
- time_t recv_idle; /* time since last NP packet received */
+ __time_t xmit_idle; /* time since last NP packet sent */
+ __time_t recv_idle; /* time since last NP packet received */
};
#ifndef __P
Modified: trunk/sys/net/radix.c
===================================================================
--- trunk/sys/net/radix.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/radix.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)radix.c 8.5 (Berkeley) 5/19/95
- * $FreeBSD: stable/9/sys/net/radix.c 210122 2010-07-15 14:41:59Z luigi $
+ * $FreeBSD: stable/10/sys/net/radix.c 273185 2014-10-16 20:46:02Z glebius $
*/
/*
@@ -67,28 +67,28 @@
*rn_search(void *, struct radix_node *),
*rn_search_m(void *, struct radix_node *, void *);
-static int max_keylen;
-static struct radix_mask *rn_mkfreelist;
-static struct radix_node_head *mask_rnhead;
+static void rn_detachhead_internal(void **head);
+static int rn_inithead_internal(void **head, int off);
+
+#define RADIX_MAX_KEY_LEN 32
+
+static char rn_zeros[RADIX_MAX_KEY_LEN];
+static char rn_ones[RADIX_MAX_KEY_LEN] = {
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
/*
- * Work area -- the following point to 3 buffers of size max_keylen,
- * allocated in this order in a block of memory malloc'ed by rn_init.
- * rn_zeros, rn_ones are set in rn_init and used in readonly afterwards.
- * addmask_key is used in rn_addmask in rw mode and not thread-safe.
+ * XXX: Compat stuff for old rn_addmask() users
*/
-static char *rn_zeros, *rn_ones, *addmask_key;
+static struct radix_node_head *mask_rnhead_compat;
+#ifdef _KERNEL
+static struct mtx mask_mtx;
+#endif
-#define MKGet(m) { \
- if (rn_mkfreelist) { \
- m = rn_mkfreelist; \
- rn_mkfreelist = (m)->rm_mklist; \
- } else \
- R_Malloc(m, struct radix_mask *, sizeof (struct radix_mask)); }
-
-#define MKFree(m) { (m)->rm_mklist = rn_mkfreelist; rn_mkfreelist = (m);}
-#define rn_masktop (mask_rnhead->rnh_treetop)
-
static int rn_lexobetter(void *m_arg, void *n_arg);
static struct radix_mask *
rn_new_radix_mask(struct radix_node *tt,
@@ -157,12 +157,10 @@
* Search a node in the tree matching the key.
*/
static struct radix_node *
-rn_search(v_arg, head)
- void *v_arg;
- struct radix_node *head;
+rn_search(void *v_arg, struct radix_node *head)
{
- register struct radix_node *x;
- register caddr_t v;
+ struct radix_node *x;
+ caddr_t v;
for (x = head, v = v_arg; x->rn_bit >= 0;) {
if (x->rn_bmask & v[x->rn_offset])
@@ -178,12 +176,10 @@
* XXX note this function is used only once.
*/
static struct radix_node *
-rn_search_m(v_arg, head, m_arg)
- struct radix_node *head;
- void *v_arg, *m_arg;
+rn_search_m(void *v_arg, struct radix_node *head, void *m_arg)
{
- register struct radix_node *x;
- register caddr_t v = v_arg, m = m_arg;
+ struct radix_node *x;
+ caddr_t v = v_arg, m = m_arg;
for (x = head; x->rn_bit >= 0;) {
if ((x->rn_bmask & m[x->rn_offset]) &&
@@ -192,15 +188,14 @@
else
x = x->rn_left;
}
- return x;
+ return (x);
}
int
-rn_refines(m_arg, n_arg)
- void *m_arg, *n_arg;
+rn_refines(void *m_arg, void *n_arg)
{
- register caddr_t m = m_arg, n = n_arg;
- register caddr_t lim, lim2 = lim = n + LEN(n);
+ caddr_t m = m_arg, n = n_arg;
+ caddr_t lim, lim2 = lim = n + LEN(n);
int longer = LEN(n++) - LEN(m++);
int masks_are_equal = 1;
@@ -208,49 +203,71 @@
lim -= longer;
while (n < lim) {
if (*n & ~(*m))
- return 0;
+ return (0);
if (*n++ != *m++)
masks_are_equal = 0;
}
while (n < lim2)
if (*n++)
- return 0;
+ return (0);
if (masks_are_equal && (longer < 0))
for (lim2 = m - longer; m < lim2; )
if (*m++)
- return 1;
+ return (1);
return (!masks_are_equal);
}
+/*
+ * Search for exact match in given @head.
+ * Assume host bits are cleared in @v_arg if @m_arg is not NULL
+ * Note that prefixes with /32 or /128 masks are treated differently
+ * from host routes.
+ */
struct radix_node *
-rn_lookup(v_arg, m_arg, head)
- void *v_arg, *m_arg;
- struct radix_node_head *head;
+rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
{
- register struct radix_node *x;
- caddr_t netmask = 0;
+ struct radix_node *x;
+ caddr_t netmask;
- if (m_arg) {
- x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset);
- if (x == 0)
- return (0);
+ if (m_arg != NULL) {
+ /*
+ * Most common case: search exact prefix/mask
+ */
+ x = rn_addmask_r(m_arg, head->rnh_masks, 1,
+ head->rnh_treetop->rn_offset);
+ if (x == NULL)
+ return (NULL);
netmask = x->rn_key;
- }
- x = rn_match(v_arg, head);
- if (x && netmask) {
- while (x && x->rn_mask != netmask)
+
+ x = rn_match(v_arg, head);
+
+ while (x != NULL && x->rn_mask != netmask)
x = x->rn_dupedkey;
+
+ return (x);
}
- return x;
+
+ /*
+ * Search for host address.
+ */
+ if ((x = rn_match(v_arg, head)) == NULL)
+ return (NULL);
+
+ /* Check if found key is the same */
+ if (LEN(x->rn_key) != LEN(v_arg) || bcmp(x->rn_key, v_arg, LEN(v_arg)))
+ return (NULL);
+
+ /* Check if this is not host route */
+ if (x->rn_mask != NULL)
+ return (NULL);
+
+ return (x);
}
static int
-rn_satisfies_leaf(trial, leaf, skip)
- char *trial;
- register struct radix_node *leaf;
- int skip;
+rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip)
{
- register char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask;
+ char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask;
char *cplim;
int length = min(LEN(cp), LEN(cp2));
@@ -261,22 +278,23 @@
cplim = cp + length; cp3 += skip; cp2 += skip;
for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
if ((*cp ^ *cp2) & *cp3)
- return 0;
- return 1;
+ return (0);
+ return (1);
}
+/*
+ * Search for longest-prefix match in given @head
+ */
struct radix_node *
-rn_match(v_arg, head)
- void *v_arg;
- struct radix_node_head *head;
+rn_match(void *v_arg, struct radix_node_head *head)
{
caddr_t v = v_arg;
- register struct radix_node *t = head->rnh_treetop, *x;
- register caddr_t cp = v, cp2;
+ struct radix_node *t = head->rnh_treetop, *x;
+ caddr_t cp = v, cp2;
caddr_t cplim;
struct radix_node *saved_t, *top = t;
int off = t->rn_offset, vlen = LEN(cp), matched_off;
- register int test, b, rn_bit;
+ int test, b, rn_bit;
/*
* Open code rn_search(v, top) to avoid overhead of extra
@@ -314,7 +332,7 @@
*/
if (t->rn_flags & RNF_ROOT)
t = t->rn_dupedkey;
- return t;
+ return (t);
on1:
test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
for (b = 7; (test >>= 1) > 0;)
@@ -335,13 +353,13 @@
*/
if (t->rn_flags & RNF_NORMAL) {
if (rn_bit <= t->rn_bit)
- return t;
+ return (t);
} else if (rn_satisfies_leaf(v, t, matched_off))
- return t;
+ return (t);
t = saved_t;
/* start searching up the tree */
do {
- register struct radix_mask *m;
+ struct radix_mask *m;
t = t->rn_parent;
m = t->rn_mklist;
/*
@@ -360,12 +378,12 @@
while (x && x->rn_mask != m->rm_mask)
x = x->rn_dupedkey;
if (x && rn_satisfies_leaf(v, x, off))
- return x;
+ return (x);
}
m = m->rm_mklist;
}
} while (t != top);
- return 0;
+ return (0);
}
#ifdef RN_DEBUG
@@ -387,12 +405,9 @@
*/
static struct radix_node *
-rn_newpair(v, b, nodes)
- void *v;
- int b;
- struct radix_node nodes[2];
+rn_newpair(void *v, int b, struct radix_node nodes[2])
{
- register struct radix_node *tt = nodes, *t = tt + 1;
+ struct radix_node *tt = nodes, *t = tt + 1;
t->rn_bit = b;
t->rn_bmask = 0x80 >> (b & 7);
t->rn_left = tt;
@@ -416,29 +431,25 @@
tt->rn_ybro = rn_clist;
rn_clist = tt;
#endif
- return t;
+ return (t);
}
static struct radix_node *
-rn_insert(v_arg, head, dupentry, nodes)
- void *v_arg;
- struct radix_node_head *head;
- int *dupentry;
- struct radix_node nodes[2];
+rn_insert(void *v_arg, struct radix_node_head *head, int *dupentry,
+ struct radix_node nodes[2])
{
caddr_t v = v_arg;
struct radix_node *top = head->rnh_treetop;
int head_off = top->rn_offset, vlen = LEN(v);
- register struct radix_node *t = rn_search(v_arg, top);
- register caddr_t cp = v + head_off;
- register int b;
- struct radix_node *tt;
+ struct radix_node *t = rn_search(v_arg, top);
+ caddr_t cp = v + head_off;
+ int b;
+ struct radix_node *p, *tt, *x;
/*
* Find first bit at which v and t->rn_key differ
*/
- {
- register caddr_t cp2 = t->rn_key + head_off;
- register int cmp_res;
+ caddr_t cp2 = t->rn_key + head_off;
+ int cmp_res;
caddr_t cplim = v + vlen;
while (cp < cplim)
@@ -445,15 +456,14 @@
if (*cp2++ != *cp++)
goto on1;
*dupentry = 1;
- return t;
+ return (t);
on1:
*dupentry = 0;
cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
for (b = (cp - v) << 3; cmp_res; b--)
cmp_res >>= 1;
- }
- {
- register struct radix_node *p, *x = top;
+
+ x = top;
cp = v;
do {
p = x;
@@ -485,33 +495,31 @@
if (rn_debug)
log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p);
#endif
- }
return (tt);
}
struct radix_node *
-rn_addmask(n_arg, search, skip)
- int search, skip;
- void *n_arg;
+rn_addmask_r(void *arg, struct radix_node_head *maskhead, int search, int skip)
{
- caddr_t netmask = (caddr_t)n_arg;
- register struct radix_node *x;
- register caddr_t cp, cplim;
- register int b = 0, mlen, j;
- int maskduplicated, m0, isnormal;
+ unsigned char *netmask = arg;
+ unsigned char *cp, *cplim;
+ struct radix_node *x;
+ int b = 0, mlen, j;
+ int maskduplicated, isnormal;
struct radix_node *saved_x;
- static int last_zeroed = 0;
+ unsigned char addmask_key[RADIX_MAX_KEY_LEN];
- if ((mlen = LEN(netmask)) > max_keylen)
- mlen = max_keylen;
+ if ((mlen = LEN(netmask)) > RADIX_MAX_KEY_LEN)
+ mlen = RADIX_MAX_KEY_LEN;
if (skip == 0)
skip = 1;
if (mlen <= skip)
- return (mask_rnhead->rnh_nodes);
+ return (maskhead->rnh_nodes);
+
+ bzero(addmask_key, RADIX_MAX_KEY_LEN);
if (skip > 1)
bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
- if ((m0 = mlen) > skip)
- bcopy(netmask + skip, addmask_key + skip, mlen - skip);
+ bcopy(netmask + skip, addmask_key + skip, mlen - skip);
/*
* Trim trailing zeroes.
*/
@@ -518,25 +526,20 @@
for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
cp--;
mlen = cp - addmask_key;
- if (mlen <= skip) {
- if (m0 >= last_zeroed)
- last_zeroed = mlen;
- return (mask_rnhead->rnh_nodes);
- }
- if (m0 < last_zeroed)
- bzero(addmask_key + m0, last_zeroed - m0);
- *addmask_key = last_zeroed = mlen;
- x = rn_search(addmask_key, rn_masktop);
+ if (mlen <= skip)
+ return (maskhead->rnh_nodes);
+ *addmask_key = mlen;
+ x = rn_search(addmask_key, maskhead->rnh_treetop);
if (bcmp(addmask_key, x->rn_key, mlen) != 0)
x = 0;
if (x || search)
return (x);
- R_Zalloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x));
+ R_Zalloc(x, struct radix_node *, RADIX_MAX_KEY_LEN + 2 * sizeof (*x));
if ((saved_x = x) == 0)
return (0);
netmask = cp = (caddr_t)(x + 2);
bcopy(addmask_key, cp, mlen);
- x = rn_insert(cp, mask_rnhead, &maskduplicated, x);
+ x = rn_insert(cp, maskhead, &maskduplicated, x);
if (maskduplicated) {
log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
Free(saved_x);
@@ -546,20 +549,18 @@
* Calculate index of mask, and check for normalcy.
* First find the first byte with a 0 bit, then if there are
* more bits left (remember we already trimmed the trailing 0's),
- * the pattern must be one of those in normal_chars[], or we have
+ * the bits should be contiguous, otherwise we have got
* a non-contiguous mask.
*/
+#define CONTIG(_c) (((~(_c) + 1) & (_c)) == (unsigned char)(~(_c) + 1))
cplim = netmask + mlen;
isnormal = 1;
for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;)
cp++;
if (cp != cplim) {
- static char normal_chars[] = {
- 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
-
for (j = 0x80; (j & *cp) != 0; j >>= 1)
b++;
- if (*cp != normal_chars[b] || cp != (cplim - 1))
+ if (!CONTIG(*cp) || cp != (cplim - 1))
isnormal = 0;
}
b += (cp - netmask) << 3;
@@ -569,34 +570,48 @@
return (x);
}
+struct radix_node *
+rn_addmask(void *n_arg, int search, int skip)
+{
+ struct radix_node *tt;
+
+#ifdef _KERNEL
+ mtx_lock(&mask_mtx);
+#endif
+ tt = rn_addmask_r(&mask_rnhead_compat, n_arg, search, skip);
+
+#ifdef _KERNEL
+ mtx_unlock(&mask_mtx);
+#endif
+
+ return (tt);
+}
+
static int /* XXX: arbitrary ordering for non-contiguous masks */
-rn_lexobetter(m_arg, n_arg)
- void *m_arg, *n_arg;
+rn_lexobetter(void *m_arg, void *n_arg)
{
- register u_char *mp = m_arg, *np = n_arg, *lim;
+ u_char *mp = m_arg, *np = n_arg, *lim;
if (LEN(mp) > LEN(np))
- return 1; /* not really, but need to check longer one first */
+ return (1); /* not really, but need to check longer one first */
if (LEN(mp) == LEN(np))
for (lim = mp + LEN(mp); mp < lim;)
if (*mp++ > *np++)
- return 1;
- return 0;
+ return (1);
+ return (0);
}
static struct radix_mask *
-rn_new_radix_mask(tt, next)
- register struct radix_node *tt;
- register struct radix_mask *next;
+rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next)
{
- register struct radix_mask *m;
+ struct radix_mask *m;
- MKGet(m);
- if (m == 0) {
- log(LOG_ERR, "Mask for route not entered\n");
+ R_Malloc(m, struct radix_mask *, sizeof (struct radix_mask));
+ if (m == NULL) {
+ log(LOG_ERR, "Failed to allocate route mask\n");
return (0);
}
- bzero(m, sizeof *m);
+ bzero(m, sizeof(*m));
m->rm_bit = tt->rn_bit;
m->rm_flags = tt->rn_flags;
if (tt->rn_flags & RNF_NORMAL)
@@ -605,17 +620,15 @@
m->rm_mask = tt->rn_mask;
m->rm_mklist = next;
tt->rn_mklist = m;
- return m;
+ return (m);
}
struct radix_node *
-rn_addroute(v_arg, n_arg, head, treenodes)
- void *v_arg, *n_arg;
- struct radix_node_head *head;
- struct radix_node treenodes[2];
+rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
+ struct radix_node treenodes[2])
{
caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
- register struct radix_node *t, *x = 0, *tt;
+ struct radix_node *t, *x = 0, *tt;
struct radix_node *saved_tt, *top = head->rnh_treetop;
short b = 0, b_leaf = 0;
int keyduplicated;
@@ -630,7 +643,8 @@
* nodes and possibly save time in calculating indices.
*/
if (netmask) {
- if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0)
+ x = rn_addmask_r(netmask, head->rnh_masks, 0, top->rn_offset);
+ if (x == NULL)
return (0);
b_leaf = x->rn_bit;
b = -1 - x->rn_bit;
@@ -742,7 +756,7 @@
on2:
/* Add new route to highest possible ancestor's list */
if ((netmask == 0) || (b > t->rn_bit ))
- return tt; /* can't lift at all */
+ return (tt); /* can't lift at all */
b_leaf = tt->rn_bit;
do {
x = t;
@@ -766,7 +780,7 @@
log(LOG_ERR,
"Non-unique normal route, mask not entered\n");
#endif
- return tt;
+ return (tt);
}
} else
mmask = m->rm_mask;
@@ -773,7 +787,7 @@
if (mmask == netmask) {
m->rm_refs++;
tt->rn_mklist = m;
- return tt;
+ return (tt);
}
if (rn_refines(netmask, mmask)
|| rn_lexobetter(netmask, mmask))
@@ -780,15 +794,13 @@
break;
}
*mp = rn_new_radix_mask(tt, *mp);
- return tt;
+ return (tt);
}
struct radix_node *
-rn_delete(v_arg, netmask_arg, head)
- void *v_arg, *netmask_arg;
- struct radix_node_head *head;
+rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
{
- register struct radix_node *t, *p, *x, *tt;
+ struct radix_node *t, *p, *x, *tt;
struct radix_mask *m, *saved_m, **mp;
struct radix_node *dupedkey, *saved_tt, *top;
caddr_t v, netmask;
@@ -809,7 +821,8 @@
* Delete our route from mask lists.
*/
if (netmask) {
- if ((x = rn_addmask(netmask, 1, head_off)) == 0)
+ x = rn_addmask_r(netmask, head->rnh_masks, 1, head_off);
+ if (x == NULL)
return (0);
netmask = x->rn_key;
while (tt->rn_mask != netmask)
@@ -821,7 +834,7 @@
if (tt->rn_flags & RNF_NORMAL) {
if (m->rm_leaf != tt || m->rm_refs > 0) {
log(LOG_ERR, "rn_delete: inconsistent annotation\n");
- return 0; /* dangling ref could cause disaster */
+ return (0); /* dangling ref could cause disaster */
}
} else {
if (m->rm_mask != tt->rn_mask) {
@@ -842,7 +855,7 @@
for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
if (m == saved_m) {
*mp = m->rm_mklist;
- MKFree(m);
+ Free(m);
break;
}
if (m == 0) {
@@ -933,7 +946,7 @@
struct radix_mask *mm = m->rm_mklist;
x->rn_mklist = 0;
if (--(m->rm_refs) < 0)
- MKFree(m);
+ Free(m);
m = mm;
}
if (m)
@@ -973,17 +986,14 @@
* exit.
*/
static int
-rn_walktree_from(h, a, m, f, w)
- struct radix_node_head *h;
- void *a, *m;
- walktree_f_t *f;
- void *w;
+rn_walktree_from(struct radix_node_head *h, void *a, void *m,
+ walktree_f_t *f, void *w)
{
int error;
struct radix_node *base, *next;
u_char *xa = (u_char *)a;
u_char *xm = (u_char *)m;
- register struct radix_node *rn, *last = 0 /* shut up gcc */;
+ struct radix_node *rn, *last = NULL; /* shut up gcc */
int stopping = 0;
int lastb;
@@ -1076,18 +1086,15 @@
}
}
- return 0;
+ return (0);
}
static int
-rn_walktree(h, f, w)
- struct radix_node_head *h;
- walktree_f_t *f;
- void *w;
+rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w)
{
int error;
struct radix_node *base, *next;
- register struct radix_node *rn = h->rnh_treetop;
+ struct radix_node *rn = h->rnh_treetop;
/*
* This gets complicated because we may delete the node
* while applying the function f to it, so we need to calculate
@@ -1129,13 +1136,11 @@
* bits starting at 'off'.
* Return 1 on success, 0 on error.
*/
-int
-rn_inithead(head, off)
- void **head;
- int off;
+static int
+rn_inithead_internal(void **head, int off)
{
- register struct radix_node_head *rnh;
- register struct radix_node *t, *tt, *ttt;
+ struct radix_node_head *rnh;
+ struct radix_node *t, *tt, *ttt;
if (*head)
return (1);
R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh));
@@ -1164,8 +1169,8 @@
return (1);
}
-int
-rn_detachhead(void **head)
+static void
+rn_detachhead_internal(void **head)
{
struct radix_node_head *rnh;
@@ -1177,28 +1182,73 @@
Free(rnh);
*head = NULL;
+}
+
+int
+rn_inithead(void **head, int off)
+{
+ struct radix_node_head *rnh;
+
+ if (*head != NULL)
+ return (1);
+
+ if (rn_inithead_internal(head, off) == 0)
+ return (0);
+
+ rnh = (struct radix_node_head *)(*head);
+
+ if (rn_inithead_internal((void **)&rnh->rnh_masks, 0) == 0) {
+ rn_detachhead_internal(head);
+ return (0);
+ }
+
return (1);
}
+static int
+rn_freeentry(struct radix_node *rn, void *arg)
+{
+ struct radix_node_head * const rnh = arg;
+ struct radix_node *x;
+
+ x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh);
+ if (x != NULL)
+ Free(x);
+ return (0);
+}
+
+int
+rn_detachhead(void **head)
+{
+ struct radix_node_head *rnh;
+
+ KASSERT((head != NULL && *head != NULL),
+ ("%s: head already freed", __func__));
+
+ rnh = *head;
+
+ rn_walktree(rnh->rnh_masks, rn_freeentry, rnh->rnh_masks);
+ rn_detachhead_internal((void **)&rnh->rnh_masks);
+ rn_detachhead_internal(head);
+ return (1);
+}
+
void
rn_init(int maxk)
{
- char *cp, *cplim;
-
- max_keylen = maxk;
- if (max_keylen == 0) {
+ if ((maxk <= 0) || (maxk > RADIX_MAX_KEY_LEN)) {
log(LOG_ERR,
- "rn_init: radix functions require max_keylen be set\n");
+ "rn_init: max_keylen must be within 1..%d\n",
+ RADIX_MAX_KEY_LEN);
return;
}
- R_Malloc(rn_zeros, char *, 3 * max_keylen);
- if (rn_zeros == NULL)
- panic("rn_init");
- bzero(rn_zeros, 3 * max_keylen);
- rn_ones = cp = rn_zeros + max_keylen;
- addmask_key = cplim = rn_ones + max_keylen;
- while (cp < cplim)
- *cp++ = -1;
- if (rn_inithead((void **)(void *)&mask_rnhead, 0) == 0)
+
+ /*
+ * XXX: Compat for old rn_addmask() users
+ */
+ if (rn_inithead((void **)(void *)&mask_rnhead_compat, 0) == 0)
panic("rn_init 2");
+#ifdef _KERNEL
+ mtx_init(&mask_mtx, "radix_mask", NULL, MTX_DEF);
+#endif
}
Modified: trunk/sys/net/radix.h
===================================================================
--- trunk/sys/net/radix.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/radix.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)radix.h 8.2 (Berkeley) 10/31/94
- * $FreeBSD: stable/9/sys/net/radix.h 225698 2011-09-20 20:27:26Z kmacy $
+ * $FreeBSD: stable/10/sys/net/radix.h 265708 2014-05-08 20:27:06Z melifaro $
*/
#ifndef _RADIX_H_
@@ -120,9 +120,9 @@
(void *v, void *mask, struct radix_node_head *head);
struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */
(void *v, void *mask, struct radix_node_head *head);
- struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */
+ struct radix_node *(*rnh_matchaddr) /* longest match for sockaddr */
(void *v, struct radix_node_head *head);
- struct radix_node *(*rnh_lookup) /* locate based on sockaddr */
+ struct radix_node *(*rnh_lookup) /*exact match for sockaddr*/
(void *v, void *mask, struct radix_node_head *head);
struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */
(void *v, struct radix_node_head *head);
@@ -137,6 +137,7 @@
#ifdef _KERNEL
struct rwlock rnh_lock; /* locks entire radix tree */
#endif
+ struct radix_node_head *rnh_masks; /* Storage for our masks */
};
#ifndef _KERNEL
@@ -168,6 +169,7 @@
int rn_refines(void *, void *);
struct radix_node
*rn_addmask(void *, int, int),
+ *rn_addmask_r(void *, struct radix_node_head *, int, int),
*rn_addroute (void *, void *, struct radix_node_head *,
struct radix_node [2]),
*rn_delete(void *, void *, struct radix_node_head *),
Modified: trunk/sys/net/radix_mpath.c
===================================================================
--- trunk/sys/net/radix_mpath.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/radix_mpath.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/radix_mpath.c 225163 2011-08-25 04:31:20Z qingli $");
+__FBSDID("$FreeBSD: stable/10/sys/net/radix_mpath.c 265711 2014-05-08 20:41:39Z melifaro $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -86,7 +86,7 @@
while (rn != NULL) {
rt = (struct rtentry *)rn;
- i += rt->rt_rmx.rmx_weight;
+ i += rt->rt_weight;
rn = rn_mpath_next(rn);
}
return (i);
@@ -113,11 +113,16 @@
if (rt->rt_gateway->sa_family == AF_LINK) {
if (!memcmp(rt->rt_ifa->ifa_addr, gate, gate->sa_len))
break;
- } else {
- if (rt->rt_gateway->sa_len == gate->sa_len &&
- !memcmp(rt->rt_gateway, gate, gate->sa_len))
- break;
}
+
+ /*
+ * Check for other options:
+ * 1) Routes with 'real' IPv4/IPv6 gateway
+ * 2) Loopback host routes (another AF_LINK/sockadd_dl check)
+ * */
+ if (rt->rt_gateway->sa_len == gate->sa_len &&
+ !memcmp(rt->rt_gateway, gate, gate->sa_len))
+ break;
} while ((rn = rn_mpath_next(rn)) != NULL);
return (struct rtentry *)rn;
@@ -152,6 +157,7 @@
/*
* check if we have the same key/mask/gateway on the table already.
+ * Assume @rt rt_key host bits are cleared according to @netmask
*/
int
rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
@@ -159,76 +165,13 @@
{
struct radix_node *rn, *rn1;
struct rtentry *rt1;
- char *p, *q, *eq;
- int same, l, skip;
rn = (struct radix_node *)rt;
rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
if (!rn1 || rn1->rn_flags & RNF_ROOT)
- return 0;
+ return (0);
- /*
- * unlike other functions we have in this file, we have to check
- * all key/mask/gateway as rnh_lookup can match less specific entry.
- */
- rt1 = (struct rtentry *)rn1;
-
- /* compare key. */
- if (rt_key(rt1)->sa_len != rt_key(rt)->sa_len ||
- bcmp(rt_key(rt1), rt_key(rt), rt_key(rt1)->sa_len))
- goto different;
-
- /* key was the same. compare netmask. hairy... */
- if (rt_mask(rt1) && netmask) {
- skip = rnh->rnh_treetop->rn_offset;
- if (rt_mask(rt1)->sa_len > netmask->sa_len) {
- /*
- * as rt_mask(rt1) is made optimal by radix.c,
- * there must be some 1-bits on rt_mask(rt1)
- * after netmask->sa_len. therefore, in
- * this case, the entries are different.
- */
- if (rt_mask(rt1)->sa_len > skip)
- goto different;
- else {
- /* no bits to compare, i.e. same*/
- goto maskmatched;
- }
- }
-
- l = rt_mask(rt1)->sa_len;
- if (skip > l) {
- /* no bits to compare, i.e. same */
- goto maskmatched;
- }
- p = (char *)rt_mask(rt1);
- q = (char *)netmask;
- if (bcmp(p + skip, q + skip, l - skip))
- goto different;
- /*
- * need to go through all the bit, as netmask is not
- * optimal and can contain trailing 0s
- */
- eq = (char *)netmask + netmask->sa_len;
- q += l;
- same = 1;
- while (eq > q)
- if (*q++) {
- same = 0;
- break;
- }
- if (!same)
- goto different;
- } else if (!rt_mask(rt1) && !netmask)
- ; /* no mask to compare, i.e. same */
- else {
- /* one has mask and the other does not, different */
- goto different;
- }
-
-maskmatched:
-
- /* key/mask were the same. compare gateway for all multipaths */
+ /* key/mask are the same. compare gateway for all multipaths */
do {
rt1 = (struct rtentry *)rn1;
@@ -249,11 +192,10 @@
}
/* all key/mask/gateway are the same. conflicting entry. */
- return EEXIST;
+ return (EEXIST);
} while ((rn1 = rn_mpath_next(rn1)) != NULL);
-different:
- return 0;
+ return (0);
}
void
@@ -289,8 +231,8 @@
hash += hashjitter;
hash %= n;
for (weight = abs((int32_t)hash), rt = ro->ro_rt;
- weight >= rt->rt_rmx.rmx_weight && rn;
- weight -= rt->rt_rmx.rmx_weight) {
+ weight >= rt->rt_weight && rn;
+ weight -= rt->rt_weight) {
/* stay within the multipath routes */
if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
Modified: trunk/sys/net/radix_mpath.h
===================================================================
--- trunk/sys/net/radix_mpath.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/radix_mpath.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -33,7 +33,7 @@
* BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL
* PROPERTIES.
*/
-/* $FreeBSD: stable/9/sys/net/radix_mpath.h 179426 2008-05-30 09:34:35Z qingli $ */
+/* $FreeBSD: stable/10/sys/net/radix_mpath.h 179426 2008-05-30 09:34:35Z qingli $ */
#ifndef _NET_RADIX_MPATH_H_
#define _NET_RADIX_MPATH_H_
Modified: trunk/sys/net/raw_cb.c
===================================================================
--- trunk/sys/net/raw_cb.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/raw_cb.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -29,7 +29,7 @@
* SUCH DAMAGE.
*
* @(#)raw_cb.c 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/raw_cb.c 248085 2013-03-09 02:36:32Z marius $
+ * $FreeBSD: stable/10/sys/net/raw_cb.c 227309 2011-11-07 15:43:11Z ed $
*/
#include <sys/param.h>
Modified: trunk/sys/net/raw_cb.h
===================================================================
--- trunk/sys/net/raw_cb.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/raw_cb.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -29,7 +29,7 @@
* SUCH DAMAGE.
*
* @(#)raw_cb.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/raw_cb.h 225970 2011-10-04 11:35:18Z bz $
+ * $FreeBSD: stable/10/sys/net/raw_cb.h 225837 2011-09-28 13:48:36Z bz $
*/
#ifndef _NET_RAW_CB_H_
Modified: trunk/sys/net/raw_usrreq.c
===================================================================
--- trunk/sys/net/raw_usrreq.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/raw_usrreq.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -29,7 +29,7 @@
* SUCH DAMAGE.
*
* @(#)raw_usrreq.c 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/net/raw_usrreq.c 225970 2011-10-04 11:35:18Z bz $
+ * $FreeBSD: stable/10/sys/net/raw_usrreq.c 225837 2011-09-28 13:48:36Z bz $
*/
#include <sys/param.h>
Added: trunk/sys/net/rndis.h
===================================================================
--- trunk/sys/net/rndis.h (rev 0)
+++ trunk/sys/net/rndis.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,384 @@
+/* $MidnightBSD$ */
+/* $FreeBSD: stable/10/sys/net/rndis.h 321760 2017-07-31 06:40:09Z sephe $ */
+/* $OpenBSD: if_urndisreg.h,v 1.19 2013/11/21 14:08:05 mpi Exp $ */
+
+/*
+ * Copyright (c) 2010 Jonathan Armani <armani at openbsd.org>
+ * Copyright (c) 2010 Fabien Romano <fabien at openbsd.org>
+ * Copyright (c) 2010 Michael Knudsen <mk at openbsd.org>
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _NET_RNDIS_H_
+#define _NET_RNDIS_H_
+
+/* Canonical major/minor version as of 22th Aug. 2016. */
+#define RNDIS_VERSION_MAJOR 0x00000001
+#define RNDIS_VERSION_MINOR 0x00000000
+
+#define RNDIS_STATUS_SUCCESS 0x00000000L
+#define RNDIS_STATUS_PENDING 0x00000103L
+#define RNDIS_STATUS_MEDIA_CONNECT 0x4001000BL
+#define RNDIS_STATUS_MEDIA_DISCONNECT 0x4001000CL
+#define RNDIS_STATUS_LINK_SPEED_CHANGE 0x40010013L
+#define RNDIS_STATUS_NETWORK_CHANGE 0x40010018L
+#define RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG 0x40020006L
+#define RNDIS_STATUS_BUFFER_OVERFLOW 0x80000005L
+#define RNDIS_STATUS_FAILURE 0xC0000001L
+#define RNDIS_STATUS_NOT_SUPPORTED 0xC00000BBL
+#define RNDIS_STATUS_RESOURCES 0xC000009AL
+#define RNDIS_STATUS_INVALID_DATA 0xC0010015L
+
+#define OID_GEN_SUPPORTED_LIST 0x00010101
+#define OID_GEN_HARDWARE_STATUS 0x00010102
+#define OID_GEN_MEDIA_SUPPORTED 0x00010103
+#define OID_GEN_MEDIA_IN_USE 0x00010104
+#define OID_GEN_MAXIMUM_LOOKAHEAD 0x00010105
+#define OID_GEN_MAXIMUM_FRAME_SIZE 0x00010106
+#define OID_GEN_LINK_SPEED 0x00010107
+#define OID_GEN_TRANSMIT_BUFFER_SPACE 0x00010108
+#define OID_GEN_RECEIVE_BUFFER_SPACE 0x00010109
+#define OID_GEN_TRANSMIT_BLOCK_SIZE 0x0001010A
+#define OID_GEN_RECEIVE_BLOCK_SIZE 0x0001010B
+#define OID_GEN_VENDOR_ID 0x0001010C
+#define OID_GEN_VENDOR_DESCRIPTION 0x0001010D
+#define OID_GEN_CURRENT_PACKET_FILTER 0x0001010E
+#define OID_GEN_CURRENT_LOOKAHEAD 0x0001010F
+#define OID_GEN_DRIVER_VERSION 0x00010110
+#define OID_GEN_MAXIMUM_TOTAL_SIZE 0x00010111
+#define OID_GEN_PROTOCOL_OPTIONS 0x00010112
+#define OID_GEN_MAC_OPTIONS 0x00010113
+#define OID_GEN_MEDIA_CONNECT_STATUS 0x00010114
+#define OID_GEN_MAXIMUM_SEND_PACKETS 0x00010115
+#define OID_GEN_VENDOR_DRIVER_VERSION 0x00010116
+#define OID_GEN_SUPPORTED_GUIDS 0x00010117
+#define OID_GEN_NETWORK_LAYER_ADDRESSES 0x00010118
+#define OID_GEN_TRANSPORT_HEADER_OFFSET 0x00010119
+#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203
+#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204
+#define OID_GEN_MACHINE_NAME 0x0001021A
+#define OID_GEN_RNDIS_CONFIG_PARAMETER 0x0001021B
+#define OID_GEN_VLAN_ID 0x0001021C
+
+#define OID_802_3_PERMANENT_ADDRESS 0x01010101
+#define OID_802_3_CURRENT_ADDRESS 0x01010102
+#define OID_802_3_MULTICAST_LIST 0x01010103
+#define OID_802_3_MAXIMUM_LIST_SIZE 0x01010104
+#define OID_802_3_MAC_OPTIONS 0x01010105
+#define OID_802_3_RCV_ERROR_ALIGNMENT 0x01020101
+#define OID_802_3_XMIT_ONE_COLLISION 0x01020102
+#define OID_802_3_XMIT_MORE_COLLISIONS 0x01020103
+#define OID_802_3_XMIT_DEFERRED 0x01020201
+#define OID_802_3_XMIT_MAX_COLLISIONS 0x01020202
+#define OID_802_3_RCV_OVERRUN 0x01020203
+#define OID_802_3_XMIT_UNDERRUN 0x01020204
+#define OID_802_3_XMIT_HEARTBEAT_FAILURE 0x01020205
+#define OID_802_3_XMIT_TIMES_CRS_LOST 0x01020206
+#define OID_802_3_XMIT_LATE_COLLISIONS 0x01020207
+
+#define OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C
+#define OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020D
+
+#define RNDIS_MEDIUM_802_3 0x00000000
+
+/* Device flags */
+#define RNDIS_DF_CONNECTIONLESS 0x00000001
+#define RNDIS_DF_CONNECTION_ORIENTED 0x00000002
+
+/*
+ * Common RNDIS message header.
+ */
+struct rndis_msghdr {
+ uint32_t rm_type;
+ uint32_t rm_len;
+};
+
+/*
+ * RNDIS data message
+ */
+#define REMOTE_NDIS_PACKET_MSG 0x00000001
+
+struct rndis_packet_msg {
+ uint32_t rm_type;
+ uint32_t rm_len;
+ uint32_t rm_dataoffset;
+ uint32_t rm_datalen;
+ uint32_t rm_oobdataoffset;
+ uint32_t rm_oobdatalen;
+ uint32_t rm_oobdataelements;
+ uint32_t rm_pktinfooffset;
+ uint32_t rm_pktinfolen;
+ uint32_t rm_vchandle;
+ uint32_t rm_reserved;
+};
+
+/*
+ * Minimum value for rm_dataoffset, rm_oobdataoffset, and
+ * rm_pktinfooffset.
+ */
+#define RNDIS_PACKET_MSG_OFFSET_MIN \
+ (sizeof(struct rndis_packet_msg) - \
+ __offsetof(struct rndis_packet_msg, rm_dataoffset))
+
+/* Offset from the beginning of rndis_packet_msg. */
+#define RNDIS_PACKET_MSG_OFFSET_ABS(ofs) \
+ ((ofs) + __offsetof(struct rndis_packet_msg, rm_dataoffset))
+
+#define RNDIS_PACKET_MSG_OFFSET_ALIGN 4
+#define RNDIS_PACKET_MSG_OFFSET_ALIGNMASK \
+ (RNDIS_PACKET_MSG_OFFSET_ALIGN - 1)
+
+/* Per-packet-info for RNDIS data message */
+struct rndis_pktinfo {
+ uint32_t rm_size;
+ uint32_t rm_type; /* NDIS_PKTINFO_TYPE_ */
+ uint32_t rm_pktinfooffset;
+ uint8_t rm_data[];
+};
+
+#define RNDIS_PKTINFO_OFFSET \
+ __offsetof(struct rndis_pktinfo, rm_data[0])
+#define RNDIS_PKTINFO_SIZE_ALIGN 4
+#define RNDIS_PKTINFO_SIZE_ALIGNMASK (RNDIS_PKTINFO_SIZE_ALIGN - 1)
+
+#define NDIS_PKTINFO_TYPE_CSUM 0
+#define NDIS_PKTINFO_TYPE_IPSEC 1
+#define NDIS_PKTINFO_TYPE_LSO 2
+#define NDIS_PKTINFO_TYPE_CLASSIFY 3
+/* reserved 4 */
+#define NDIS_PKTINFO_TYPE_SGLIST 5
+#define NDIS_PKTINFO_TYPE_VLAN 6
+#define NDIS_PKTINFO_TYPE_ORIG 7
+#define NDIS_PKTINFO_TYPE_PKT_CANCELID 8
+#define NDIS_PKTINFO_TYPE_ORIG_NBLIST 9
+#define NDIS_PKTINFO_TYPE_CACHE_NBLIST 10
+#define NDIS_PKTINFO_TYPE_PKT_PAD 11
+
+/*
+ * RNDIS control messages
+ */
+
+/*
+ * Common header for RNDIS completion messages.
+ *
+ * NOTE: It does not apply to REMOTE_NDIS_RESET_CMPLT.
+ */
+struct rndis_comp_hdr {
+ uint32_t rm_type;
+ uint32_t rm_len;
+ uint32_t rm_rid;
+ uint32_t rm_status;
+};
+
+/* Initialize the device. */
+#define REMOTE_NDIS_INITIALIZE_MSG 0x00000002
+#define REMOTE_NDIS_INITIALIZE_CMPLT 0x80000002
+
+struct rndis_init_req {
+ uint32_t rm_type;
+ uint32_t rm_len;
+ uint32_t rm_rid;
+ uint32_t rm_ver_major;
+ uint32_t rm_ver_minor;
+ uint32_t rm_max_xfersz;
+};
+
+struct rndis_init_comp {
+ uint32_t rm_type;
+ uint32_t rm_len;
+ uint32_t rm_rid;
+ uint32_t rm_status;
+ uint32_t rm_ver_major;
+ uint32_t rm_ver_minor;
+ uint32_t rm_devflags;
+ uint32_t rm_medium;
+ uint32_t rm_pktmaxcnt;
+ uint32_t rm_pktmaxsz;
+ uint32_t rm_align;
+ uint32_t rm_aflistoffset;
+ uint32_t rm_aflistsz;
+};
+
+#define RNDIS_INIT_COMP_SIZE_MIN \
+ __offsetof(struct rndis_init_comp, rm_aflistsz)
+
+/* Halt the device. No response sent. */
+#define REMOTE_NDIS_HALT_MSG 0x00000003
+
+struct rndis_halt_req {
+ uint32_t rm_type;
+ uint32_t rm_len;
+ uint32_t rm_rid;
+};
+
+/* Send a query object. */
+#define REMOTE_NDIS_QUERY_MSG 0x00000004
+#define REMOTE_NDIS_QUERY_CMPLT 0x80000004
+
+struct rndis_query_req {
+ uint32_t rm_type;
+ uint32_t rm_len;
+ uint32_t rm_rid;
+ uint32_t rm_oid;
+ uint32_t rm_infobuflen;
+ uint32_t rm_infobufoffset;
+ uint32_t rm_devicevchdl;
+};
+
+#define RNDIS_QUERY_REQ_INFOBUFOFFSET \
+ (sizeof(struct rndis_query_req) - \
+ __offsetof(struct rndis_query_req, rm_rid))
+
+struct rndis_query_comp {
+ uint32_t rm_type;
+ uint32_t rm_len;
+ uint32_t rm_rid;
+ uint32_t rm_status;
+ uint32_t rm_infobuflen;
+ uint32_t rm_infobufoffset;
+};
+
+/* infobuf offset from the beginning of rndis_query_comp. */
+#define RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(ofs) \
+ ((ofs) + __offsetof(struct rndis_query_req, rm_rid))
+
+/* Send a set object request. */
+#define REMOTE_NDIS_SET_MSG 0x00000005
+#define REMOTE_NDIS_SET_CMPLT 0x80000005
+
+struct rndis_set_req {
+ uint32_t rm_type;
+ uint32_t rm_len;
+ uint32_t rm_rid;
+ uint32_t rm_oid;
+ uint32_t rm_infobuflen;
+ uint32_t rm_infobufoffset;
+ uint32_t rm_devicevchdl;
+};
+
+#define RNDIS_SET_REQ_INFOBUFOFFSET \
+ (sizeof(struct rndis_set_req) - \
+ __offsetof(struct rndis_set_req, rm_rid))
+
+struct rndis_set_comp {
+ uint32_t rm_type;
+ uint32_t rm_len;
+ uint32_t rm_rid;
+ uint32_t rm_status;
+};
+
+/*
+ * Parameter used by OID_GEN_RNDIS_CONFIG_PARAMETER.
+ */
+#define REMOTE_NDIS_SET_PARAM_NUMERIC 0x00000000
+#define REMOTE_NDIS_SET_PARAM_STRING 0x00000002
+
+struct rndis_set_parameter {
+ uint32_t rm_nameoffset;
+ uint32_t rm_namelen;
+ uint32_t rm_type;
+ uint32_t rm_valueoffset;
+ uint32_t rm_valuelen;
+};
+
+/* Perform a soft reset on the device. */
+#define REMOTE_NDIS_RESET_MSG 0x00000006
+#define REMOTE_NDIS_RESET_CMPLT 0x80000006
+
+struct rndis_reset_req {
+ uint32_t rm_type;
+ uint32_t rm_len;
+ uint32_t rm_rid;
+};
+
+struct rndis_reset_comp {
+ uint32_t rm_type;
+ uint32_t rm_len;
+ uint32_t rm_status;
+ uint32_t rm_adrreset;
+};
+
+/* 802.3 link-state or undefined message error. Sent by device. */
+#define REMOTE_NDIS_INDICATE_STATUS_MSG 0x00000007
+
+struct rndis_status_msg {
+ uint32_t rm_type;
+ uint32_t rm_len;
+ uint32_t rm_status;
+ uint32_t rm_stbuflen;
+ uint32_t rm_stbufoffset;
+ /* rndis_diag_info */
+};
+
+/* stbuf offset from the beginning of rndis_status_msg. */
+#define RNDIS_STBUFOFFSET_ABS(ofs) \
+ ((ofs) + __offsetof(struct rndis_status_msg, rm_status))
+
+/*
+ * Immediately after rndis_status_msg.rm_stbufoffset, if a control
+ * message is malformatted, or a packet message contains inappropriate
+ * content.
+ */
+struct rndis_diag_info {
+ uint32_t rm_diagstatus;
+ uint32_t rm_erroffset;
+};
+
+/* Keepalive messsage. May be sent by device. */
+#define REMOTE_NDIS_KEEPALIVE_MSG 0x00000008
+#define REMOTE_NDIS_KEEPALIVE_CMPLT 0x80000008
+
+struct rndis_keepalive_req {
+ uint32_t rm_type;
+ uint32_t rm_len;
+ uint32_t rm_rid;
+};
+
+struct rndis_keepalive_comp {
+ uint32_t rm_type;
+ uint32_t rm_len;
+ uint32_t rm_rid;
+ uint32_t rm_status;
+};
+
+/* Packet filter bits used by OID_GEN_CURRENT_PACKET_FILTER */
+#define NDIS_PACKET_TYPE_NONE 0x00000000
+#define NDIS_PACKET_TYPE_DIRECTED 0x00000001
+#define NDIS_PACKET_TYPE_MULTICAST 0x00000002
+#define NDIS_PACKET_TYPE_ALL_MULTICAST 0x00000004
+#define NDIS_PACKET_TYPE_BROADCAST 0x00000008
+#define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010
+#define NDIS_PACKET_TYPE_PROMISCUOUS 0x00000020
+#define NDIS_PACKET_TYPE_SMT 0x00000040
+#define NDIS_PACKET_TYPE_ALL_LOCAL 0x00000080
+#define NDIS_PACKET_TYPE_GROUP 0x00001000
+#define NDIS_PACKET_TYPE_ALL_FUNCTIONAL 0x00002000
+#define NDIS_PACKET_TYPE_FUNCTIONAL 0x00004000
+#define NDIS_PACKET_TYPE_MAC_FRAME 0x00008000
+
+/*
+ * Packet filter description for use with printf(9) %b identifier.
+ */
+#define NDIS_PACKET_TYPES \
+ "\20\1DIRECT\2MULTICAST\3ALLMULTI\4BROADCAST" \
+ "\5SRCROUTE\6PROMISC\7SMT\10ALLLOCAL" \
+ "\11GROUP\12ALLFUNC\13FUNC\14MACFRAME"
+
+/* RNDIS offsets */
+#define RNDIS_HEADER_OFFSET ((uint32_t)sizeof(struct rndis_msghdr))
+#define RNDIS_DATA_OFFSET \
+ ((uint32_t)(sizeof(struct rndis_packet_msg) - RNDIS_HEADER_OFFSET))
+
+#endif /* !_NET_RNDIS_H_ */
Property changes on: trunk/sys/net/rndis.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/route.c
===================================================================
--- trunk/sys/net/route.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/route.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)route.c 8.3.1.1 (Berkeley) 2/23/95
- * $FreeBSD: stable/9/sys/net/route.c 248895 2013-03-29 16:24:20Z melifaro $
+ * $FreeBSD: stable/10/sys/net/route.c 267728 2014-06-22 16:36:14Z tuexen $
*/
/************************************************************************
* Note: In this file a 'fib' is a "forwarding information base" *
@@ -38,6 +38,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_route.h"
+#include "opt_sctp.h"
#include "opt_mrouting.h"
#include "opt_mpath.h"
@@ -69,8 +70,7 @@
#include <vm/uma.h>
-/* We use 4 bits in the mbuf flags, thus we are limited to 16 FIBS. */
-#define RT_MAXFIBS 16
+#define RT_MAXFIBS UINT16_MAX
/* Kernel config default option. */
#ifdef ROUTETABLES
@@ -87,17 +87,17 @@
#define RT_NUMFIBS 1
#endif
+#if defined(INET) || defined(INET6)
+#ifdef SCTP
+extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
+#endif /* SCTP */
+#endif
+
+
+/* This is read-only.. */
u_int rt_numfibs = RT_NUMFIBS;
SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
-/*
- * Allow the boot code to allow LESS than RT_MAXFIBS to be used.
- * We can't do more because storage is statically allocated for now.
- * (for compatibility reasons.. this will change. When this changes, code should
- * be refactored to protocol independent parts and protocol dependent parts,
- * probably hanging of domain(9) specific storage to not need the full
- * fib * af RNH allocation etc. but allow tuning the number of tables per
- * address family).
- */
+/* and this can be set too big but will be fixed before it is used */
TUNABLE_INT("net.fibs", &rt_numfibs);
/*
@@ -126,7 +126,8 @@
/* compare two sockaddr structures */
-#define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
+#define sa_equal(a1, a2) (((a1)->sa_len == (a2)->sa_len) && \
+ (bcmp((a1), (a2), (a1)->sa_len) == 0))
/*
* Convert a 'struct radix_node *' to a 'struct rtentry *'.
@@ -209,7 +210,49 @@
}
SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
+static int
+rtentry_zinit(void *mem, int size, int how)
+{
+ struct rtentry *rt = mem;
+
+ rt->rt_pksent = counter_u64_alloc(how);
+ if (rt->rt_pksent == NULL)
+ return (ENOMEM);
+
+ RT_LOCK_INIT(rt);
+
+ return (0);
+}
+
static void
+rtentry_zfini(void *mem, int size)
+{
+ struct rtentry *rt = mem;
+
+ RT_LOCK_DESTROY(rt);
+ counter_u64_free(rt->rt_pksent);
+}
+
+static int
+rtentry_ctor(void *mem, int size, void *arg, int how)
+{
+ struct rtentry *rt = mem;
+
+ bzero(rt, offsetof(struct rtentry, rt_endzero));
+ counter_u64_zero(rt->rt_pksent);
+
+ return (0);
+}
+
+static void
+rtentry_dtor(void *mem, int size, void *arg)
+{
+ struct rtentry *rt = mem;
+
+ RT_UNLOCK_COND(rt);
+}
+
+static void
vnet_route_init(const void *unused __unused)
{
struct domain *dom;
@@ -220,8 +263,9 @@
V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) *
sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO);
- V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
- NULL, NULL, UMA_ALIGN_PTR, 0);
+ V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry),
+ rtentry_ctor, rtentry_dtor,
+ rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0);
for (dom = domains; dom; dom = dom->dom_next) {
if (dom->dom_rtattach == NULL)
continue;
@@ -271,6 +315,9 @@
dom->dom_rtdetach((void **)rnh, dom->dom_rtoffset);
}
}
+
+ free(V_rt_tables, M_RTABLE);
+ uma_zdestroy(V_rtzone);
}
VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
vnet_route_uninit, 0);
@@ -495,7 +542,6 @@
/*
* and the rtentry itself of course
*/
- RT_LOCK_DESTROY(rt);
uma_zfree(V_rtzone, rt);
return;
}
@@ -544,7 +590,7 @@
}
/* verify the gateway is directly reachable */
- if ((ifa = ifa_ifwithnet(gateway, 0)) == NULL) {
+ if ((ifa = ifa_ifwithnet_fib(gateway, 0, fibnum)) == NULL) {
error = ENETUNREACH;
goto out;
}
@@ -701,7 +747,7 @@
*/
ifa = NULL;
if (flags & RTF_HOST)
- ifa = ifa_ifwithdstaddr(dst);
+ ifa = ifa_ifwithdstaddr_fib(dst, fibnum);
if (ifa == NULL)
ifa = ifa_ifwithaddr(gateway);
} else {
@@ -710,10 +756,10 @@
* or host, the gateway may still be on the
* other end of a pt to pt link.
*/
- ifa = ifa_ifwithdstaddr(gateway);
+ ifa = ifa_ifwithdstaddr_fib(gateway, fibnum);
}
if (ifa == NULL)
- ifa = ifa_ifwithnet(gateway, 0);
+ ifa = ifa_ifwithnet_fib(gateway, 0, fibnum);
if (ifa == NULL) {
struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum);
if (rt == NULL)
@@ -827,7 +873,7 @@
*/
if (info->rti_ifp == NULL && ifpaddr != NULL &&
ifpaddr->sa_family == AF_LINK &&
- (ifa = ifa_ifwithnet(ifpaddr, 0)) != NULL) {
+ (ifa = ifa_ifwithnet_fib(ifpaddr, 0, fibnum)) != NULL) {
info->rti_ifp = ifa->ifa_ifp;
ifa_free(ifa);
}
@@ -943,6 +989,57 @@
return (error);
}
+#if 0
+int p_sockaddr(char *buf, int buflen, struct sockaddr *s);
+int rt_print(char *buf, int buflen, struct rtentry *rt);
+
+int
+p_sockaddr(char *buf, int buflen, struct sockaddr *s)
+{
+ void *paddr = NULL;
+
+ switch (s->sa_family) {
+ case AF_INET:
+ paddr = &((struct sockaddr_in *)s)->sin_addr;
+ break;
+ case AF_INET6:
+ paddr = &((struct sockaddr_in6 *)s)->sin6_addr;
+ break;
+ }
+
+ if (paddr == NULL)
+ return (0);
+
+ if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL)
+ return (0);
+
+ return (strlen(buf));
+}
+
+int
+rt_print(char *buf, int buflen, struct rtentry *rt)
+{
+ struct sockaddr *addr, *mask;
+ int i = 0;
+
+ addr = rt_key(rt);
+ mask = rt_mask(rt);
+
+ i = p_sockaddr(buf, buflen, addr);
+ if (!(rt->rt_flags & RTF_HOST)) {
+ buf[i++] = '/';
+ i += p_sockaddr(buf + i, buflen - i, mask);
+ }
+
+ if (rt->rt_flags & RTF_GATEWAY) {
+ buf[i++] = '>';
+ i += p_sockaddr(buf + i, buflen - i, rt->rt_gateway);
+ }
+
+ return (i);
+}
+#endif
+
#ifdef RADIX_MPATH
static int
rn_mpath_update(int req, struct rt_addrinfo *info,
@@ -956,10 +1053,11 @@
register struct radix_node *rn;
int error = 0;
- rn = rnh->rnh_matchaddr(dst, rnh);
+ rn = rnh->rnh_lookup(dst, netmask, rnh);
if (rn == NULL)
return (ESRCH);
rto = rt = RNTORT(rn);
+
rt = rt_mpath_matchgate(rt, gateway);
if (rt == NULL)
return (ESRCH);
@@ -1179,13 +1277,11 @@
} else
ifa_ref(info->rti_ifa);
ifa = info->rti_ifa;
- rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO);
+ rt = uma_zalloc(V_rtzone, M_NOWAIT);
if (rt == NULL) {
- if (ifa != NULL)
- ifa_free(ifa);
+ ifa_free(ifa);
senderr(ENOBUFS);
}
- RT_LOCK_INIT(rt);
rt->rt_flags = RTF_UP | flags;
rt->rt_fibnum = fibnum;
/*
@@ -1193,9 +1289,7 @@
*/
RT_LOCK(rt);
if ((error = rt_setgate(rt, dst, gateway)) != 0) {
- RT_LOCK_DESTROY(rt);
- if (ifa != NULL)
- ifa_free(ifa);
+ ifa_free(ifa);
uma_zfree(V_rtzone, rt);
senderr(error);
}
@@ -1220,17 +1314,14 @@
*/
rt->rt_ifa = ifa;
rt->rt_ifp = ifa->ifa_ifp;
- rt->rt_rmx.rmx_weight = 1;
+ rt->rt_weight = 1;
#ifdef RADIX_MPATH
/* do not permit exactly the same dst/mask/gw pair */
if (rn_mpath_capable(rnh) &&
rt_mpath_conflict(rnh, rt, netmask)) {
- if (rt->rt_ifa) {
- ifa_free(rt->rt_ifa);
- }
+ ifa_free(rt->rt_ifa);
Free(rt_key(rt));
- RT_LOCK_DESTROY(rt);
uma_zfree(V_rtzone, rt);
senderr(EEXIST);
}
@@ -1296,10 +1387,8 @@
* then un-make it (this should be a function)
*/
if (rn == NULL) {
- if (rt->rt_ifa)
- ifa_free(rt->rt_ifa);
+ ifa_free(rt->rt_ifa);
Free(rt_key(rt));
- RT_LOCK_DESTROY(rt);
uma_zfree(V_rtzone, rt);
#ifdef FLOWTABLE
if (rt0 != NULL)
@@ -1309,18 +1398,7 @@
}
#ifdef FLOWTABLE
else if (rt0 != NULL) {
- switch (dst->sa_family) {
-#ifdef INET6
- case AF_INET6:
- flowtable_route_flush(V_ip6_ft, rt0);
- break;
-#endif
-#ifdef INET
- case AF_INET:
- flowtable_route_flush(V_ip_ft, rt0);
- break;
-#endif
- }
+ flowtable_route_flush(dst->sa_family, rt0);
RTFREE(rt0);
}
#endif
@@ -1464,9 +1542,9 @@
fibnum = RT_DEFAULT_FIB;
break;
}
- if (fibnum == -1) {
+ if (fibnum == RT_ALL_FIBS) {
if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) {
- startfib = endfib = curthread->td_proc->p_fibnum;
+ startfib = endfib = ifa->ifa_ifp->if_fib;
} else {
startfib = 0;
endfib = rt_numfibs - 1;
@@ -1513,10 +1591,10 @@
/* this table doesn't exist but others might */
continue;
RADIX_NODE_HEAD_RLOCK(rnh);
+ rn = rnh->rnh_lookup(dst, netmask, rnh);
#ifdef RADIX_MPATH
if (rn_mpath_capable(rnh)) {
- rn = rnh->rnh_matchaddr(dst, rnh);
if (rn == NULL)
error = ESRCH;
else {
@@ -1530,17 +1608,14 @@
*/
rt = rt_mpath_matchgate(rt,
ifa->ifa_addr);
- if (!rt)
+ if (rt == NULL)
error = ESRCH;
}
}
- else
#endif
- rn = rnh->rnh_lookup(dst, netmask, rnh);
error = (rn == NULL ||
(rn->rn_flags & RNF_ROOT) ||
- RNTORT(rn)->rt_ifa != ifa ||
- !sa_equal((struct sockaddr *)rn->rn_key, dst));
+ RNTORT(rn)->rt_ifa != ifa);
RADIX_NODE_HEAD_RUNLOCK(rnh);
if (error) {
/* this is only an error if bad on ALL tables */
@@ -1580,7 +1655,7 @@
info.rti_ifa = NULL;
info.rti_flags = RTF_RNH_LOCKED;
- error = rtrequest1_fib(RTM_DELETE, &info, &rt, fibnum);
+ error = rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum);
if (error == 0) {
info.rti_ifa = ifa;
info.rti_flags = flags | RTF_RNH_LOCKED |
@@ -1666,15 +1741,6 @@
return (error);
}
-#ifndef BURN_BRIDGES
-/* special one for inet internal use. may not use. */
-int
-rtinit_fib(struct ifaddr *ifa, int cmd, int flags)
-{
- return (rtinit1(ifa, cmd, flags, -1));
-}
-#endif
-
/*
* Set up a routing table entry, normally
* for an interface.
@@ -1695,8 +1761,94 @@
case AF_INET6:
case AF_INET:
/* We do support multiple FIBs. */
- fib = -1;
+ fib = RT_ALL_FIBS;
break;
}
return (rtinit1(ifa, cmd, flags, fib));
}
+
+/*
+ * Announce interface address arrival/withdraw
+ * Returns 0 on success.
+ */
+int
+rt_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
+{
+
+ KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
+ ("unexpected cmd %d", cmd));
+
+ KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
+ ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
+
+#if defined(INET) || defined(INET6)
+#ifdef SCTP
+ /*
+ * notify the SCTP stack
+ * this will only get called when an address is added/deleted
+ * XXX pass the ifaddr struct instead if ifa->ifa_addr...
+ */
+ sctp_addr_change(ifa, cmd);
+#endif /* SCTP */
+#endif
+ return (rtsock_addrmsg(cmd, ifa, fibnum));
+}
+
+/*
+ * Announce route addition/removal.
+ * Users of this function MUST validate input data BEFORE calling.
+ * However we have to be able to handle invalid data:
+ * if some userland app sends us "invalid" route message (invalid mask,
+ * no dst, wrong address families, etc...) we need to pass it back
+ * to app (and any other rtsock consumers) with rtm_errno field set to
+ * non-zero value.
+ * Returns 0 on success.
+ */
+int
+rt_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt,
+ int fibnum)
+{
+
+ KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
+ ("unexpected cmd %d", cmd));
+
+ KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
+ ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
+
+ KASSERT(rt_key(rt) != NULL, (":%s: rt_key must be supplied", __func__));
+
+ return (rtsock_routemsg(cmd, ifp, error, rt, fibnum));
+}
+
+void
+rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
+{
+
+ rt_newaddrmsg_fib(cmd, ifa, error, rt, RT_ALL_FIBS);
+}
+
+/*
+ * This is called to generate messages from the routing socket
+ * indicating a network interface has had addresses associated with it.
+ */
+void
+rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt,
+ int fibnum)
+{
+
+ KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
+ ("unexpected cmd %u", cmd));
+ KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
+ ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
+
+ if (cmd == RTM_ADD) {
+ rt_addrmsg(cmd, ifa, fibnum);
+ if (rt != NULL)
+ rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum);
+ } else {
+ if (rt != NULL)
+ rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum);
+ rt_addrmsg(cmd, ifa, fibnum);
+ }
+}
+
Modified: trunk/sys/net/route.h
===================================================================
--- trunk/sys/net/route.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/route.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,12 +28,14 @@
* SUCH DAMAGE.
*
* @(#)route.h 8.4 (Berkeley) 1/9/95
- * $FreeBSD: stable/9/sys/net/route.h 248895 2013-03-29 16:24:20Z melifaro $
+ * $FreeBSD: stable/10/sys/net/route.h 265717 2014-05-08 21:03:31Z melifaro $
*/
#ifndef _NET_ROUTE_H_
#define _NET_ROUTE_H_
+#include <sys/counter.h>
+
/*
* Kernel resident routing tables.
*
@@ -58,17 +60,6 @@
#define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */
#define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */
-/*
- * These numbers are used by reliable protocols for determining
- * retransmission behavior and are included in the routing structure.
- */
-struct rt_metrics_lite {
- u_long rmx_mtu; /* MTU for this path */
- u_long rmx_expire; /* lifetime for route, e.g. redirect */
- u_long rmx_pksent; /* packets sent using this route */
- u_long rmx_weight; /* absolute weight */
-};
-
struct rt_metrics {
u_long rmx_locks; /* Kernel must leave these values alone */
u_long rmx_mtu; /* MTU for this path */
@@ -93,11 +84,9 @@
#define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ))
#define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */
-extern u_int rt_numfibs; /* number fo usable routing tables */
-/*
- * XXX kernel function pointer `rt_output' is visible to applications.
- */
-struct mbuf;
+#define RT_ALL_FIBS -1 /* Announce event for every fib */
+extern u_int rt_numfibs; /* number of usable routing tables */
+extern u_int rt_add_addr_allfibs; /* Announce interfaces to all fibs */
/*
* We distinguish between routes to hosts and routes to networks,
@@ -113,6 +102,8 @@
#include <net/radix_mpath.h>
#endif
#endif
+
+#if defined(_KERNEL) || defined(_WANT_RTENTRY)
struct rtentry {
struct radix_node rt_nodes[2]; /* tree glue, and other values */
/*
@@ -123,34 +114,20 @@
#define rt_key(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_key)))
#define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask)))
struct sockaddr *rt_gateway; /* value */
- int rt_flags; /* up/down?, host/net */
- int rt_refcnt; /* # held references */
struct ifnet *rt_ifp; /* the answer: interface to use */
struct ifaddr *rt_ifa; /* the answer: interface address to use */
- struct rt_metrics_lite rt_rmx; /* metrics used by rx'ing protocols */
- u_int rt_fibnum; /* which FIB */
-#ifdef _KERNEL
- /* XXX ugly, user apps use this definition but don't have a mtx def */
- struct mtx rt_mtx; /* mutex for routing entry */
-#endif
+ int rt_flags; /* up/down?, host/net */
+ int rt_refcnt; /* # held references */
+ u_int rt_fibnum; /* which FIB */
+ u_long rt_mtu; /* MTU for this path */
+ u_long rt_weight; /* absolute weight */
+ u_long rt_expire; /* lifetime for route, e.g. redirect */
+#define rt_endzero rt_pksent
+ counter_u64_t rt_pksent; /* packets sent using this route */
+ struct mtx rt_mtx; /* mutex for routing entry */
};
+#endif /* _KERNEL || _WANT_RTENTRY */
-/*
- * Following structure necessary for 4.3 compatibility;
- * We should eventually move it to a compat file.
- */
-struct ortentry {
- u_long rt_hash; /* to speed lookups */
- struct sockaddr rt_dst; /* key */
- struct sockaddr rt_gateway; /* value */
- short rt_flags; /* up/down?, host/net */
- short rt_refcnt; /* # held references */
- u_long rt_use; /* raw # packets forwarded */
- struct ifnet *rt_ifp; /* the answer: interface to use */
-};
-
-#define rt_use rt_rmx.rmx_pksent
-
#define RTF_UP 0x1 /* route usable */
#define RTF_GATEWAY 0x2 /* destination is a gateway */
#define RTF_HOST 0x4 /* host entry (net otherwise) */
@@ -168,12 +145,7 @@
#define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */
#define RTF_PROTO2 0x4000 /* protocol specific routing flag */
#define RTF_PROTO1 0x8000 /* protocol specific routing flag */
-
-/* XXX: temporary to stay API/ABI compatible with userland */
-#ifndef _KERNEL
-#define RTF_PRCLONING 0x10000 /* unused, for compatibility */
-#endif
-
+/* 0x10000 unused, was RTF_PRCLONING */
/* 0x20000 unused, was RTF_WASCLONED */
#define RTF_PROTO3 0x40000 /* protocol specific routing flag */
/* 0x80000 unused */
@@ -186,6 +158,9 @@
#define RTF_RNH_LOCKED 0x40000000 /* radix node head is locked */
+#define RTF_GWFLAG_COMPAT 0x80000000 /* a compatibility bit for interacting
+ with existing routing apps */
+
/* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */
#define RTF_FMASK \
(RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \
@@ -232,8 +207,8 @@
#define RTM_REDIRECT 0x6 /* Told to use different route */
#define RTM_MISS 0x7 /* Lookup failed on this address */
#define RTM_LOCK 0x8 /* fix specified metrics */
-#define RTM_OLDADD 0x9 /* caused by SIOCADDRT */
-#define RTM_OLDDEL 0xa /* caused by SIOCDELRT */
+ /* 0x9 */
+ /* 0xa */
#define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */
#define RTM_NEWADDR 0xc /* address being added to iface */
#define RTM_DELADDR 0xd /* address being removed from iface */
@@ -312,6 +287,10 @@
#define RT_UNLOCK(_rt) mtx_unlock(&(_rt)->rt_mtx)
#define RT_LOCK_DESTROY(_rt) mtx_destroy(&(_rt)->rt_mtx)
#define RT_LOCK_ASSERT(_rt) mtx_assert(&(_rt)->rt_mtx, MA_OWNED)
+#define RT_UNLOCK_COND(_rt) do { \
+ if (mtx_owned(&(_rt)->rt_mtx)) \
+ mtx_unlock(&(_rt)->rt_mtx); \
+} while (0)
#define RT_ADDREF(_rt) do { \
RT_LOCK_ASSERT(_rt); \
@@ -366,10 +345,15 @@
void rt_missmsg_fib(int, struct rt_addrinfo *, int, int, int);
void rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *);
void rt_newaddrmsg_fib(int, struct ifaddr *, int, struct rtentry *, int);
+int rt_addrmsg(int, struct ifaddr *, int);
+int rt_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
void rt_newmaddrmsg(int, struct ifmultiaddr *);
int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *);
+int rtsock_addrmsg(int, struct ifaddr *, int);
+int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
+
/*
* Note the following locking behavior:
*
@@ -399,11 +383,6 @@
int rtrequest(int, struct sockaddr *,
struct sockaddr *, struct sockaddr *, int, struct rtentry **);
-#ifndef BURN_BRIDGES
-/* defaults to "all" FIBs */
-int rtinit_fib(struct ifaddr *, int, int);
-#endif
-
/* XXX MRT NEW VERSIONS THAT USE FIBs
* For now the protocol indepedent versions are the same as the AF_INET ones
* but this will change..
Modified: trunk/sys/net/rtsock.c
===================================================================
--- trunk/sys/net/rtsock.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/rtsock.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,10 +28,9 @@
* SUCH DAMAGE.
*
* @(#)rtsock.c 8.7 (Berkeley) 10/12/95
- * $FreeBSD: stable/9/sys/net/rtsock.c 248085 2013-03-09 02:36:32Z marius $
+ * $FreeBSD: stable/10/sys/net/rtsock.c 302233 2016-06-27 21:44:27Z bdrewery $
*/
#include "opt_compat.h"
-#include "opt_sctp.h"
#include "opt_mpath.h"
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -53,6 +52,7 @@
#include <sys/sysctl.h>
#include <sys/systm.h>
+#define _IN_NET_RTSOCK_C
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_llatbl.h>
@@ -64,16 +64,12 @@
#include <netinet/in.h>
#include <netinet/if_ether.h>
+#include <netinet/ip_carp.h>
#ifdef INET6
+#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#endif
-#if defined(INET) || defined(INET6)
-#ifdef SCTP
-extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
-#endif /* SCTP */
-#endif
-
#ifdef COMPAT_FREEBSD32
#include <sys/mount.h>
#include <compat/freebsd32/freebsd32.h>
@@ -84,8 +80,8 @@
uint8_t ifi_addrlen;
uint8_t ifi_hdrlen;
uint8_t ifi_link_state;
- uint8_t ifi_spare_char1;
- uint8_t ifi_spare_char2;
+ uint8_t ifi_vhid;
+ uint8_t ifi_baudrate_pf;
uint8_t ifi_datalen;
uint32_t ifi_mtu;
uint32_t ifi_metric;
@@ -104,6 +100,7 @@
uint32_t ifi_hwassist;
int32_t ifi_epoch;
struct timeval32 ifi_lastchange;
+ uint32_t ifi_oqdrops;
};
struct if_msghdr32 {
@@ -150,12 +147,14 @@
static struct sockaddr route_src = { 2, PF_ROUTE, };
static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, };
+/* These are external hooks for CARP. */
+int (*carp_get_vhid_p)(struct ifaddr *);
+
/*
* Used by rtsock/raw_input callback code to decide whether to filter the update
* notification to a socket bound to a particular FIB.
*/
#define RTS_FILTER_FIB M_PROTO8
-#define RTS_ALLFIBS -1
static struct {
int ip_count; /* attached w/ AF_INET */
@@ -190,10 +189,8 @@
static int sysctl_iflist(int af, struct walkarg *w);
static int sysctl_ifmalist(int af, struct walkarg *w);
static int route_output(struct mbuf *m, struct socket *so);
-static void rt_setmetrics(u_long which, const struct rt_metrics *in,
- struct rt_metrics_lite *out);
-static void rt_getmetrics(const struct rt_metrics_lite *in,
- struct rt_metrics *out);
+static void rt_setmetrics(const struct rt_msghdr *rtm, struct rtentry *rt);
+static void rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out);
static void rt_dispatch(struct mbuf *, sa_family_t);
static struct netisr_handler rtsock_nh = {
@@ -299,29 +296,18 @@
rts_attach(struct socket *so, int proto, struct thread *td)
{
struct rawcb *rp;
- int s, error;
+ int error;
KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
/* XXX */
rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
- if (rp == NULL)
- return ENOBUFS;
- /*
- * The splnet() is necessary to block protocols from sending
- * error notifications (like RTM_REDIRECT or RTM_LOSING) while
- * this PCB is extant but incompletely initialized.
- * Probably we should try to do more of this work beforehand and
- * eliminate the spl.
- */
- s = splnet();
so->so_pcb = (caddr_t)rp;
so->so_fibnum = td->td_proc->p_fibnum;
error = raw_attach(so, proto);
rp = sotorawcb(so);
if (error) {
- splx(s);
so->so_pcb = NULL;
free(rp, M_PCB);
return error;
@@ -342,7 +328,6 @@
RTSOCK_UNLOCK();
soisconnected(so);
so->so_options |= SO_USELOOPBACK;
- splx(s);
return 0;
}
@@ -570,6 +555,11 @@
struct rtentry *rt = NULL;
struct radix_node_head *rnh;
struct rt_addrinfo info;
+#ifdef INET6
+ struct sockaddr_storage ss;
+ struct sockaddr_in6 *sin6;
+ int i, rti_need_deembed = 0;
+#endif
int len, error = 0;
struct ifnet *ifp = NULL;
union sockaddr_union saun;
@@ -600,6 +590,11 @@
rtm->rtm_pid = curproc->p_pid;
bzero(&info, sizeof(info));
info.rti_addrs = rtm->rtm_addrs;
+ /*
+ * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6
+ * link-local address because rtrequest requires addresses with
+ * embedded scope id.
+ */
if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
info.rti_info[RTAX_DST] = NULL;
senderr(EINVAL);
@@ -648,8 +643,10 @@
*/
if (gw_ro.ro_rt != NULL &&
gw_ro.ro_rt->rt_gateway->sa_family == AF_LINK &&
- gw_ro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)
+ gw_ro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) {
info.rti_flags &= ~RTF_GATEWAY;
+ info.rti_flags |= RTF_GWFLAG_COMPAT;
+ }
if (gw_ro.ro_rt != NULL)
RTFREE(gw_ro.ro_rt);
}
@@ -666,14 +663,20 @@
if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
(rtm->rtm_flags & RTF_LLDATA) != 0) {
error = lla_rt_output(rtm, &info);
+#ifdef INET6
+ if (error == 0)
+ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
break;
}
error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt,
so->so_fibnum);
if (error == 0 && saved_nrt) {
+#ifdef INET6
+ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
RT_LOCK(saved_nrt);
- rt_setmetrics(rtm->rtm_inits,
- &rtm->rtm_rmx, &saved_nrt->rt_rmx);
+ rt_setmetrics(rtm, saved_nrt);
rtm->rtm_index = saved_nrt->rt_ifp->if_index;
RT_REMREF(saved_nrt);
RT_UNLOCK(saved_nrt);
@@ -687,6 +690,10 @@
(info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) &&
(rtm->rtm_flags & RTF_LLDATA) != 0) {
error = lla_rt_output(rtm, &info);
+#ifdef INET6
+ if (error == 0)
+ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
break;
}
error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt,
@@ -696,6 +703,10 @@
rt = saved_nrt;
goto report;
}
+#ifdef INET6
+ /* rt_msg2() will not be used when RTM_DELETE fails. */
+ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
break;
case RTM_GET:
@@ -705,10 +716,24 @@
info.rti_info[RTAX_DST]->sa_family);
if (rnh == NULL)
senderr(EAFNOSUPPORT);
+
RADIX_NODE_HEAD_RLOCK(rnh);
- rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
- info.rti_info[RTAX_NETMASK], rnh);
- if (rt == NULL) { /* XXX looks bogus */
+
+ if (info.rti_info[RTAX_NETMASK] == NULL &&
+ rtm->rtm_type == RTM_GET) {
+ /*
+ * Provide logest prefix match for
+ * address lookup (no mask).
+ * 'route -n get addr'
+ */
+ rt = (struct rtentry *) rnh->rnh_matchaddr(
+ info.rti_info[RTAX_DST], rnh);
+ } else
+ rt = (struct rtentry *) rnh->rnh_lookup(
+ info.rti_info[RTAX_DST],
+ info.rti_info[RTAX_NETMASK], rnh);
+
+ if (rt == NULL) {
RADIX_NODE_HEAD_RUNLOCK(rnh);
senderr(ESRCH);
}
@@ -765,25 +790,6 @@
RT_ADDREF(rt);
RADIX_NODE_HEAD_RUNLOCK(rnh);
- /*
- * Fix for PR: 82974
- *
- * RTM_CHANGE/LOCK need a perfect match, rn_lookup()
- * returns a perfect match in case a netmask is
- * specified. For host routes only a longest prefix
- * match is returned so it is necessary to compare the
- * existence of the netmask. If both have a netmask
- * rnh_lookup() did a perfect match and if none of them
- * have a netmask both are host routes which is also a
- * perfect match.
- */
-
- if (rtm->rtm_type != RTM_GET &&
- (!rt_mask(rt) != !info.rti_info[RTAX_NETMASK])) {
- RT_UNLOCK(rt);
- senderr(ESRCH);
- }
-
switch(rtm->rtm_type) {
case RTM_GET:
@@ -834,8 +840,12 @@
Free(rtm); rtm = new_rtm;
}
(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
- rtm->rtm_flags = rt->rt_flags;
- rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
+ if (rt->rt_flags & RTF_GWFLAG_COMPAT)
+ rtm->rtm_flags = RTF_GATEWAY |
+ (rt->rt_flags & ~RTF_GWFLAG_COMPAT);
+ else
+ rtm->rtm_flags = rt->rt_flags;
+ rt_getmetrics(rt, &rtm->rtm_rmx);
rtm->rtm_addrs = info.rti_addrs;
break;
@@ -886,6 +896,7 @@
RT_UNLOCK(rt);
senderr(error);
}
+ rt->rt_flags &= ~RTF_GATEWAY;
rt->rt_flags |= (RTF_GATEWAY & info.rti_flags);
}
if (info.rti_ifa != NULL &&
@@ -897,8 +908,7 @@
/* Allow some flags to be toggled on change. */
rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) |
(rtm->rtm_flags & RTF_FMASK);
- rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
- &rt->rt_rmx);
+ rt_setmetrics(rtm, rt);
rtm->rtm_index = rt->rt_ifp->if_index;
if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
@@ -939,6 +949,22 @@
rp = sotorawcb(so);
}
if (rtm) {
+#ifdef INET6
+ if (rti_need_deembed) {
+ /* sin6_scope_id is recovered before sending rtm. */
+ sin6 = (struct sockaddr_in6 *)&ss;
+ for (i = 0; i < RTAX_MAX; i++) {
+ if (info.rti_info[i] == NULL)
+ continue;
+ if (info.rti_info[i]->sa_family != AF_INET6)
+ continue;
+ bcopy(info.rti_info[i], sin6, sizeof(*sin6));
+ if (sa6_recoverscope(sin6) == 0)
+ bcopy(sin6, info.rti_info[i],
+ sizeof(*sin6));
+ }
+ }
+#endif
m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
if (m->m_pkthdr.len < rtm->rtm_msglen) {
m_freem(m);
@@ -970,34 +996,30 @@
}
static void
-rt_setmetrics(u_long which, const struct rt_metrics *in,
- struct rt_metrics_lite *out)
+rt_setmetrics(const struct rt_msghdr *rtm, struct rtentry *rt)
{
-#define metric(f, e) if (which & (f)) out->e = in->e;
- /*
- * Only these are stored in the routing entry since introduction
- * of tcp hostcache. The rest is ignored.
- */
- metric(RTV_MTU, rmx_mtu);
- metric(RTV_WEIGHT, rmx_weight);
- /* Userland -> kernel timebase conversion. */
- if (which & RTV_EXPIRE)
- out->rmx_expire = in->rmx_expire ?
- in->rmx_expire - time_second + time_uptime : 0;
-#undef metric
+
+ if (rtm->rtm_inits & RTV_MTU)
+ rt->rt_mtu = rtm->rtm_rmx.rmx_mtu;
+ if (rtm->rtm_inits & RTV_WEIGHT)
+ rt->rt_weight = rtm->rtm_rmx.rmx_weight;
+ /* Kernel -> userland timebase conversion. */
+ if (rtm->rtm_inits & RTV_EXPIRE)
+ rt->rt_expire = rtm->rtm_rmx.rmx_expire ?
+ rtm->rtm_rmx.rmx_expire - time_second + time_uptime : 0;
}
static void
-rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out)
+rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
{
-#define metric(e) out->e = in->e;
+
bzero(out, sizeof(*out));
- metric(rmx_mtu);
- metric(rmx_weight);
+ out->rmx_mtu = rt->rt_mtu;
+ out->rmx_weight = rt->rt_weight;
+ out->rmx_pksent = counter_u64_fetch(rt->rt_pksent);
/* Kernel -> userland timebase conversion. */
- out->rmx_expire = in->rmx_expire ?
- in->rmx_expire - time_uptime + time_second : 0;
-#undef metric
+ out->rmx_expire = rt->rt_expire ?
+ rt->rt_expire - time_uptime + time_second : 0;
}
/*
@@ -1032,6 +1054,11 @@
return (0); /* should be EINVAL but for compat */
}
/* accept it */
+#ifdef INET6
+ if (sa->sa_family == AF_INET6)
+ sa6_embedscope((struct sockaddr_in6 *)sa,
+ V_ip6_use_defzone);
+#endif
rtinfo->rti_info[i] = sa;
cp += SA_SIZE(sa);
}
@@ -1048,6 +1075,10 @@
struct mbuf *m;
int i;
struct sockaddr *sa;
+#ifdef INET6
+ struct sockaddr_storage ss;
+ struct sockaddr_in6 *sin6;
+#endif
int len, dlen;
switch (type) {
@@ -1074,20 +1105,17 @@
default:
len = sizeof(struct rt_msghdr);
}
- if (len > MCLBYTES)
- panic("rt_msg1");
- m = m_gethdr(M_DONTWAIT, MT_DATA);
- if (m && len > MHLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- m_free(m);
- m = NULL;
- }
- }
+
+ /* XXXGL: can we use MJUMPAGESIZE cluster here? */
+ KASSERT(len <= MCLBYTES, ("%s: message too big", __func__));
+ if (len > MHLEN)
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ else
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (m);
+
m->m_pkthdr.len = m->m_len = len;
- m->m_pkthdr.rcvif = NULL;
rtm = mtod(m, struct rt_msghdr *);
bzero((caddr_t)rtm, len);
for (i = 0; i < RTAX_MAX; i++) {
@@ -1095,6 +1123,14 @@
continue;
rtinfo->rti_addrs |= (1 << i);
dlen = SA_SIZE(sa);
+#ifdef INET6
+ if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
+ sin6 = (struct sockaddr_in6 *)&ss;
+ bcopy(sa, sin6, sizeof(*sin6));
+ if (sa6_recoverscope(sin6) == 0)
+ sa = (struct sockaddr *)sin6;
+ }
+#endif
m_copyback(m, len, dlen, (caddr_t)sa);
len += dlen;
}
@@ -1117,6 +1153,10 @@
int i;
int len, dlen, second_time = 0;
caddr_t cp0;
+#ifdef INET6
+ struct sockaddr_storage ss;
+ struct sockaddr_in6 *sin6;
+#endif
rtinfo->rti_addrs = 0;
again:
@@ -1169,6 +1209,14 @@
rtinfo->rti_addrs |= (1 << i);
dlen = SA_SIZE(sa);
if (cp) {
+#ifdef INET6
+ if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
+ sin6 = (struct sockaddr_in6 *)&ss;
+ bcopy(sa, sin6, sizeof(*sin6));
+ if (sa6_recoverscope(sin6) == 0)
+ sa = (struct sockaddr *)sin6;
+ }
+#endif
bcopy((caddr_t)sa, cp, (unsigned)dlen);
cp += dlen;
}
@@ -1224,7 +1272,7 @@
if (m == NULL)
return;
- if (fibnum != RTS_ALLFIBS) {
+ if (fibnum != RT_ALL_FIBS) {
KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
"of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
M_SETFIB(m, fibnum);
@@ -1242,7 +1290,7 @@
rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
{
- rt_missmsg_fib(type, rtinfo, flags, error, RTS_ALLFIBS);
+ rt_missmsg_fib(type, rtinfo, flags, error, RT_ALL_FIBS);
}
/*
@@ -1271,89 +1319,92 @@
}
/*
- * This is called to generate messages from the routing socket
- * indicating a network interface has had addresses associated with it.
- * if we ever reverse the logic and replace messages TO the routing
- * socket indicate a request to configure interfaces, then it will
- * be unnecessary as the routing socket will automatically generate
- * copies of it.
+ * Announce interface address arrival/withdraw.
+ * Please do not call directly, use rt_addrmsg().
+ * Assume input data to be valid.
+ * Returns 0 on success.
*/
-void
-rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt,
- int fibnum)
+int
+rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
{
struct rt_addrinfo info;
- struct sockaddr *sa = NULL;
- int pass;
- struct mbuf *m = NULL;
+ struct sockaddr *sa;
+ int ncmd;
+ struct mbuf *m;
+ struct ifa_msghdr *ifam;
struct ifnet *ifp = ifa->ifa_ifp;
- KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
- ("unexpected cmd %u", cmd));
-#if defined(INET) || defined(INET6)
-#ifdef SCTP
- /*
- * notify the SCTP stack
- * this will only get called when an address is added/deleted
- * XXX pass the ifaddr struct instead if ifa->ifa_addr...
- */
- sctp_addr_change(ifa, cmd);
-#endif /* SCTP */
-#endif
if (route_cb.any_count == 0)
- return;
- for (pass = 1; pass < 3; pass++) {
- bzero((caddr_t)&info, sizeof(info));
- if ((cmd == RTM_ADD && pass == 1) ||
- (cmd == RTM_DELETE && pass == 2)) {
- struct ifa_msghdr *ifam;
- int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
+ return (0);
- info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
- info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
- info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
- info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
- if ((m = rt_msg1(ncmd, &info)) == NULL)
- continue;
- ifam = mtod(m, struct ifa_msghdr *);
- ifam->ifam_index = ifp->if_index;
- ifam->ifam_metric = ifa->ifa_metric;
- ifam->ifam_flags = ifa->ifa_flags;
- ifam->ifam_addrs = info.rti_addrs;
- }
- if ((cmd == RTM_ADD && pass == 2) ||
- (cmd == RTM_DELETE && pass == 1)) {
- struct rt_msghdr *rtm;
+ ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
- if (rt == NULL)
- continue;
- info.rti_info[RTAX_NETMASK] = rt_mask(rt);
- info.rti_info[RTAX_DST] = sa = rt_key(rt);
- info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
- if ((m = rt_msg1(cmd, &info)) == NULL)
- continue;
- rtm = mtod(m, struct rt_msghdr *);
- rtm->rtm_index = ifp->if_index;
- rtm->rtm_flags |= rt->rt_flags;
- rtm->rtm_errno = error;
- rtm->rtm_addrs = info.rti_addrs;
- }
- if (fibnum != RTS_ALLFIBS) {
- KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: "
- "fibnum out of range 0 <= %d < %d", __func__,
- fibnum, rt_numfibs));
- M_SETFIB(m, fibnum);
- m->m_flags |= RTS_FILTER_FIB;
- }
- rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
+ info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
+ info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
+ info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
+ if ((m = rt_msg1(ncmd, &info)) == NULL)
+ return (ENOBUFS);
+ ifam = mtod(m, struct ifa_msghdr *);
+ ifam->ifam_index = ifp->if_index;
+ ifam->ifam_metric = ifa->ifa_metric;
+ ifam->ifam_flags = ifa->ifa_flags;
+ ifam->ifam_addrs = info.rti_addrs;
+
+ if (fibnum != RT_ALL_FIBS) {
+ M_SETFIB(m, fibnum);
+ m->m_flags |= RTS_FILTER_FIB;
}
+
+ rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
+
+ return (0);
}
-void
-rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
+/*
+ * Announce route addition/removal.
+ * Please do not call directly, use rt_routemsg().
+ * Note that @rt data MAY be inconsistent/invalid:
+ * if some userland app sends us "invalid" route message (invalid mask,
+ * no dst, wrong address families, etc...) we need to pass it back
+ * to app (and any other rtsock consumers) with rtm_errno field set to
+ * non-zero value.
+ *
+ * Returns 0 on success.
+ */
+int
+rtsock_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt,
+ int fibnum)
{
+ struct rt_addrinfo info;
+ struct sockaddr *sa;
+ struct mbuf *m;
+ struct rt_msghdr *rtm;
- rt_newaddrmsg_fib(cmd, ifa, error, rt, RTS_ALLFIBS);
+ if (route_cb.any_count == 0)
+ return (0);
+
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+ info.rti_info[RTAX_DST] = sa = rt_key(rt);
+ info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ if ((m = rt_msg1(cmd, &info)) == NULL)
+ return (ENOBUFS);
+ rtm = mtod(m, struct rt_msghdr *);
+ rtm->rtm_index = ifp->if_index;
+ rtm->rtm_flags |= rt->rt_flags;
+ rtm->rtm_errno = error;
+ rtm->rtm_addrs = info.rti_addrs;
+
+ if (fibnum != RT_ALL_FIBS) {
+ M_SETFIB(m, fibnum);
+ m->m_flags |= RTS_FILTER_FIB;
+ }
+
+ rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
+
+ return (0);
}
/*
@@ -1530,12 +1581,12 @@
if (w->w_req && w->w_tmem) {
struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
- rtm->rtm_flags = rt->rt_flags;
- /*
- * let's be honest about this being a retarded hack
- */
- rtm->rtm_fmask = rt->rt_rmx.rmx_pksent;
- rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
+ if (rt->rt_flags & RTF_GWFLAG_COMPAT)
+ rtm->rtm_flags = RTF_GATEWAY |
+ (rt->rt_flags & ~RTF_GWFLAG_COMPAT);
+ else
+ rtm->rtm_flags = rt->rt_flags;
+ rt_getmetrics(rt, &rtm->rtm_rmx);
rtm->rtm_index = rt->rt_ifp->if_index;
rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
rtm->rtm_addrs = info.rti_addrs;
@@ -1556,6 +1607,8 @@
CP(*src, *dst, ifi_addrlen);
CP(*src, *dst, ifi_hdrlen);
CP(*src, *dst, ifi_link_state);
+ CP(*src, *dst, ifi_vhid);
+ CP(*src, *dst, ifi_baudrate_pf);
dst->ifi_datalen = sizeof(struct if_data32);
CP(*src, *dst, ifi_mtu);
CP(*src, *dst, ifi_metric);
@@ -1596,6 +1649,11 @@
ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
+ /* Fixup if_data carp(4) vhid. */
+ if (carp_get_vhid_p != NULL)
+ ifm32->ifm_data.ifi_vhid =
+ (*carp_get_vhid_p)(ifp->if_addr);
+ ifm32->ifm_data.ifi_oqdrops = ifp->if_snd.ifq_drops;
return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
}
@@ -1609,7 +1667,13 @@
ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
ifm->ifm_data = ifp->if_data;
+ /* Fixup if_data carp(4) vhid. */
+ if (carp_get_vhid_p != NULL)
+ ifm->ifm_data.ifi_vhid = (*carp_get_vhid_p)(ifp->if_addr);
+ ifm->ifm_data.ifi_datalen += sizeof(u_long);
+ ifm->ifi_oqdrops = ifp->if_snd.ifq_drops;
+
return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
}
@@ -1629,6 +1693,10 @@
ifm32->ifm_index = ifp->if_index;
copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
+ /* Fixup if_data carp(4) vhid. */
+ if (carp_get_vhid_p != NULL)
+ ifm32->ifm_data.ifi_vhid =
+ (*carp_get_vhid_p)(ifp->if_addr);
return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
}
@@ -1639,6 +1707,9 @@
ifm->ifm_index = ifp->if_index;
ifm->ifm_data = ifp->if_data;
+ /* Fixup if_data carp(4) vhid. */
+ if (carp_get_vhid_p != NULL)
+ ifm->ifm_data.ifi_vhid = (*carp_get_vhid_p)(ifp->if_addr);
return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
}
@@ -1664,6 +1735,9 @@
ifam32->ifam_metric = ifa->ifa_metric;
copy_ifdata32(&ifa->ifa_ifp->if_data, &ifam32->ifam_data);
+ /* Fixup if_data carp(4) vhid. */
+ if (carp_get_vhid_p != NULL)
+ ifam32->ifam_data.ifi_vhid = (*carp_get_vhid_p)(ifa);
return (SYSCTL_OUT(w->w_req, (caddr_t)ifam32, len));
}
@@ -1679,6 +1753,9 @@
ifam->ifam_metric = ifa->ifa_metric;
ifam->ifam_data = ifa->if_data;
+ /* Fixup if_data carp(4) vhid. */
+ if (carp_get_vhid_p != NULL)
+ ifam->ifam_data.ifi_vhid = (*carp_get_vhid_p)(ifa);
return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
}
@@ -1707,7 +1784,7 @@
int len, error = 0;
bzero((caddr_t)&info, sizeof(info));
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (w->w_arg && w->w_arg != ifp->if_index)
continue;
@@ -1752,7 +1829,7 @@
done:
if (ifp != NULL)
IF_ADDR_RUNLOCK(ifp);
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (error);
}
@@ -1766,7 +1843,7 @@
struct ifaddr *ifa;
bzero((caddr_t)&info, sizeof(info));
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (w->w_arg && w->w_arg != ifp->if_index)
continue;
@@ -1801,7 +1878,7 @@
IF_ADDR_RUNLOCK(ifp);
}
done:
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (error);
}
@@ -1812,6 +1889,7 @@
u_int namelen = arg2;
struct radix_node_head *rnh = NULL; /* silence compiler. */
int i, lim, error = EINVAL;
+ int fib = 0;
u_char af;
struct walkarg w;
@@ -1819,7 +1897,17 @@
namelen--;
if (req->newptr)
return (EPERM);
- if (namelen != 3)
+ if (name[1] == NET_RT_DUMP) {
+ if (namelen == 3)
+ fib = req->td->td_proc->p_fibnum;
+ else if (namelen == 4)
+ fib = (name[3] == RT_ALL_FIBS) ?
+ req->td->td_proc->p_fibnum : name[3];
+ else
+ return ((namelen < 3) ? EISDIR : ENOTDIR);
+ if (fib < 0 || fib >= rt_numfibs)
+ return (EINVAL);
+ } else if (namelen != 3)
return ((namelen < 3) ? EISDIR : ENOTDIR);
af = name[0];
if (af > AF_MAX)
@@ -1858,7 +1946,7 @@
* take care of routing entries
*/
for (error = 0; error == 0 && i <= lim; i++) {
- rnh = rt_tables_get_rnh(req->td->td_proc->p_fibnum, i);
+ rnh = rt_tables_get_rnh(fib, i);
if (rnh != NULL) {
RADIX_NODE_HEAD_RLOCK(rnh);
error = rnh->rnh_walktree(rnh,
Added: trunk/sys/net/sff8436.h
===================================================================
--- trunk/sys/net/sff8436.h (rev 0)
+++ trunk/sys/net/sff8436.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,214 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 Yandex LLC.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/net/sff8436.h 294202 2016-01-17 05:38:03Z melifaro $
+ */
+
+/*
+ * The following set of constants are from Document SFF-8436
+ * "QSFP+ 10 Gbs 4X PLUGGABLE TRANSCEIVER" revision 4.8 dated October 31, 2013
+ *
+ * This SFF standard defines the following QSFP+ memory address module:
+ *
+ * 1) 256-byte addressable block and 128-byte pages
+ * 2) Lower 128-bytes addresses always refer to the same page
+ * 3) Upper address space may refer to different pages depending on
+ * "page select" byte value.
+ *
+ * Map description:
+ *
+ * Serial address 0xA02:
+ *
+ * Lower bits
+ * 0-127 Monitoring data & page select byte
+ * 128-255:
+ *
+ * Page 00:
+ * 128-191 Base ID Fields
+ * 191-223 Extended ID
+ * 223-255 Vendor Specific ID
+ *
+ * Page 01 (optional):
+ * 128-255 App-specific data
+ *
+ * Page 02 (optional):
+ * 128-255 User EEPROM Data
+ *
+ * Page 03 (optional for Cable Assmeblies)
+ * 128-223 Thresholds
+ * 225-237 Vendor Specific
+ * 238-253 Channel Controls/Monitor
+ * 254-255 Reserverd
+ *
+ * All these values are read across an I2C (i squared C) bus.
+ */
+
+#define SFF_8436_BASE 0xA0 /* Base address for all requests */
+
+/* Table 17 - Lower Memory Map */
+enum {
+ SFF_8436_MID = 0, /* Copy of SFF_8436_ID field */
+ SFF_8436_STATUS = 1, /* 2-bytes status (Table 18) */
+ SFF_8436_INTR_START = 3, /* Interrupt flags (Tables 19-21) */
+ SFF_8436_INTR_END = 21,
+ SFF_8436_MODMON_START = 22, /* Module monitors (Table 22 */
+ SFF_8436_TEMP = 22, /* Internally measured module temp */
+ SFF_8436_VCC = 26, /* Internally mesasure module
+ * supplied voltage */
+ SFF_8436_MODMON_END = 33,
+ SFF_8436_CHMON_START = 34, /* Channel monitors (Table 23) */
+ SFF_8436_RX_CH1_MSB = 34, /* Internally measured RX input power */
+ SFF_8436_RX_CH1_LSB = 35, /* for channel 1 */
+ SFF_8436_RX_CH2_MSB = 36, /* Internally measured RX input power */
+ SFF_8436_RX_CH2_LSB = 37, /* for channel 2 */
+ SFF_8436_RX_CH3_MSB = 38, /* Internally measured RX input power */
+ SFF_8436_RX_CH3_LSB = 39, /* for channel 3 */
+ SFF_8436_RX_CH4_MSB = 40, /* Internally measured RX input power */
+ SFF_8436_RX_CH4_LSB = 41, /* for channel 4 */
+ SFF_8436_TX_CH1_MSB = 42, /* Internally measured TX bias */
+ SFF_8436_TX_CH1_LSB = 43, /* for channel 1 */
+ SFF_8436_TX_CH2_MSB = 44, /* Internally measured TX bias */
+ SFF_8436_TX_CH2_LSB = 45, /* for channel 2 */
+ SFF_8436_TX_CH3_MSB = 46, /* Internally measured TX bias */
+ SFF_8436_TX_CH3_LSB = 47, /* for channel 3 */
+ SFF_8436_TX_CH4_MSB = 48, /* Internally measured TX bias */
+ SFF_8436_TX_CH4_LSB = 49, /* for channel 4 */
+ SFF_8436_CHANMON_END = 81,
+ SFF_8436_CONTROL_START = 86, /* Control (Table 24) */
+ SFF_8436_CONTROL_END = 97,
+ SFF_8436_MASKS_START = 100, /* Module/channel masks (Table 25) */
+ SFF_8436_MASKS_END = 106,
+ SFF_8436_CHPASSWORD = 119, /* Password change entry (4 bytes) */
+ SFF_8436_PASSWORD = 123, /* Password entry area (4 bytes) */
+ SFF_8436_PAGESEL = 127, /* Page select byte */
+};
+
+/* Table 18 - Status Indicators bits */
+/* Byte 1: all bits reserved */
+
+/* Byte 2 bits */
+#define SFF_8436_STATUS_FLATMEM (1 << 2) /* Upper memory flat or paged
+ * 0 = paging, 1=Page 0 only */
+#define SFF_8436_STATUS_INTL (1 << 1) /* Digital state of the intL
+ * Interrupt output pin */
+#define SFF_8436_STATUS_NOTREADY 1 /* Module has not yet achieved
+ * power up and memory data is not
+ * ready. 0=data is ready */
+/*
+ * Upper page 0 definitions:
+ * Table 29 - Serial ID: Data fields.
+ *
+ * Note that this table is mostly the same as used in SFF-8472.
+ * The only differenee is address shift: +128 bytes.
+ */
+enum {
+ SFF_8436_ID = 128, /* Module Type (defined in sff8472.h) */
+ SFF_8436_EXT_ID = 129, /* Extended transceiver type
+ * (Table 31) */
+ SFF_8436_CONNECTOR = 130, /* Connector type (Table 32) */
+ SFF_8436_TRANS_START = 131, /* Electric or Optical Compatibility
+ * (Table 33) */
+ SFF_8436_CODE_E1040100G = 131, /* 10/40/100G Ethernet Compliance Code */
+ SFF_8436_CODE_SONET = 132, /* SONET Compliance codes */
+ SFF_8436_CODE_SATA = 133, /* SAS/SATA compliance codes */
+ SFF_8436_CODE_E1G = 134, /* Gigabit Ethernet Compliant codes */
+ SFF_8436_CODE_FC_START = 135, /* FC link/media/speed */
+ SFF_8436_CODE_FC_END = 138,
+ SFF_8436_TRANS_END = 138,
+ SFF_8436_ENCODING = 139, /* Encoding Code for high speed
+ * serial encoding algorithm (see
+ * Table 34) */
+ SFF_8436_BITRATE = 140, /* Nominal signaling rate, units
+ * of 100MBd. */
+ SFF_8436_RATEID = 141, /* Extended RateSelect Compliance
+ * (see Table 35) */
+ SFF_8436_LEN_SMF_KM = 142, /* Link length supported for single
+ * mode fiber, units of km */
+ SFF_8436_LEN_OM3 = 143, /* Link length supported for 850nm
+ * 50um multimode fiber, units of 2 m */
+ SFF_8436_LEN_OM2 = 144, /* Link length supported for 50 um
+ * OM2 fiber, units of 1 m */
+ SFF_8436_LEN_OM1 = 145, /* Link length supported for 1310 nm
+ * 50um multi-mode fiber, units of 1m*/
+ SFF_8436_LEN_ASM = 144, /* Link length of passive cable assembly
+ * Length is specified as in the INF
+ * 8074, units of 1m. 0 means this is
+ * not value assembly. Value of 255
+ * means thet the Module supports length
+ * greater than 254 m. */
+ SFF_8436_DEV_TECH = 147, /* Device/transmitter technology,
+ * see Table 36/37 */
+ SFF_8436_VENDOR_START = 148, /* Vendor name, 16 bytes, padded
+ * right with 0x20 */
+ SFF_8436_VENDOR_END = 163,
+ SFF_8436_EXTMODCODE = 164, /* Extended module code, Table 164 */
+ SFF_8436_VENDOR_OUI_START = 165 , /* Vendor OUI SFP vendor IEEE
+ * company ID */
+ SFF_8436_VENDOR_OUI_END = 167,
+ SFF_8436_PN_START = 168, /* Vendor PN, padded right with 0x20 */
+ SFF_8436_PN_END = 183,
+ SFF_8436_REV_START = 184, /* Vendor Revision, padded right 0x20 */
+ SFF_8436_REV_END = 185,
+ SFF_8436_WAVELEN_START = 186, /* Wavelength Laser wavelength
+ * (Passive/Active Cable
+ * Specification Compliance) */
+ SFF_8436_WAVELEN_END = 189,
+ SFF_8436_MAX_CASE_TEMP = 190, /* Allows to specify maximum temp
+ * above 70C. Maximum case temperature is
+ * an 8-bit value in Degrees C. A value
+ *of 0 implies the standard 70C rating.*/
+ SFF_8436_CC_BASE = 191, /* CC_BASE Check code for Base ID
+ * Fields (first 63 bytes) */
+ /* Extended ID fields */
+ SFF_8436_OPTIONS_START = 192, /* Options Indicates which optional
+ * transceiver signals are
+ * implemented (see Table 39) */
+ SFF_8436_OPTIONS_END = 195,
+ SFF_8436_SN_START = 196, /* Vendor SN, riwght padded with 0x20 */
+ SFF_8436_SN_END = 211,
+ SFF_8436_DATE_START = 212, /* Vendor’s manufacturing date code
+ * (see Table 40) */
+ SFF_8436_DATE_END = 219,
+ SFF_8436_DIAG_TYPE = 220, /* Diagnostic Monitoring Type
+ * Indicates which type of
+ * diagnostic monitoring is
+ * implemented (if any) in the
+ * transceiver (see Table 41) */
+
+ SFF_8436_ENHANCED = 221, /* Enhanced Options Indicates which
+ * optional features are implemented
+ * (if any) in the transceiver
+ * (see Table 42) */
+ SFF_8636_BITRATE = 222, /* Nominal bit rate per channel, units
+ * of 250 Mbps */
+ SFF_8436_CC_EXT = 223, /* Check code for the Extended ID
+ * Fields (bytes 192-222 incl) */
+ SFF_8436_VENDOR_RSRVD_START = 224,
+ SFF_8436_VENDOR_RSRVD_END = 255,
+};
+
+
Property changes on: trunk/sys/net/sff8436.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/net/sff8472.h
===================================================================
--- trunk/sys/net/sff8472.h (rev 0)
+++ trunk/sys/net/sff8472.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -0,0 +1,509 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013 George V. Neville-Neil
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/net/sff8472.h 286810 2015-08-15 17:52:55Z melifaro $
+ */
+
+/*
+ * The following set of constants are from Document SFF-8472
+ * "Diagnostic Monitoring Interface for Optical Transceivers" revision
+ * 11.3 published by the SFF Committee on June 11, 2013
+ *
+ * The SFF standard defines two ranges of addresses, each 255 bytes
+ * long for the storage of data and diagnostics on cables, such as
+ * SFP+ optics and TwinAx cables. The ranges are defined in the
+ * following way:
+ *
+ * Base Address 0xa0 (Identification Data)
+ * 0-95 Serial ID Defined by SFP MSA
+ * 96-127 Vendor Specific Data
+ * 128-255 Reserved
+ *
+ * Base Address 0xa2 (Diagnostic Data)
+ * 0-55 Alarm and Warning Thresholds
+ * 56-95 Cal Constants
+ * 96-119 Real Time Diagnostic Interface
+ * 120-127 Vendor Specific
+ * 128-247 User Writable EEPROM
+ * 248-255 Vendor Specific
+ *
+ * Note that not all addresses are supported. Where support is
+ * optional this is noted and instructions for checking for the
+ * support are supplied.
+ *
+ * All these values are read across an I2C (i squared C) bus. Any
+ * device wishing to read these addresses must first have support for
+ * i2c calls. The Chelsio T4/T5 driver (dev/cxgbe) is one such
+ * driver.
+ */
+
+
+/* Table 3.1 Two-wire interface ID: Data Fields */
+
+enum {
+ SFF_8472_BASE = 0xa0, /* Base address for all our queries. */
+ SFF_8472_ID = 0, /* Transceiver Type (Table 3.2) */
+ SFF_8472_EXT_ID = 1, /* Extended transceiver type (Table 3.3) */
+ SFF_8472_CONNECTOR = 2, /* Connector type (Table 3.4) */
+ SFF_8472_TRANS_START = 3, /* Elec or Optical Compatibility
+ * (Table 3.5) */
+ SFF_8472_TRANS_END = 10,
+ SFF_8472_ENCODING = 11, /* Encoding Code for high speed
+ * serial encoding algorithm (see
+ * Table 3.6) */
+ SFF_8472_BITRATE = 12, /* Nominal signaling rate, units
+ * of 100MBd. (see details for
+ * rates > 25.0Gb/s) */
+ SFF_8472_RATEID = 13, /* Type of rate select
+ * functionality (see Table
+ * 3.6a) */
+ SFF_8472_LEN_SMF_KM = 14, /* Link length supported for single
+ * mode fiber, units of km */
+ SFF_8472_LEN_SMF = 15, /* Link length supported for single
+ * mode fiber, units of 100 m */
+ SFF_8472_LEN_50UM = 16, /* Link length supported for 50 um
+ * OM2 fiber, units of 10 m */
+ SFF_8472_LEN_625UM = 17, /* Link length supported for 62.5
+ * um OM1 fiber, units of 10 m */
+ SFF_8472_LEN_OM4 = 18, /* Link length supported for 50um
+ * OM4 fiber, units of 10m.
+ * Alternatively copper or direct
+ * attach cable, units of m */
+ SFF_8472_LEN_OM3 = 19, /* Link length supported for 50 um OM3 fiber, units of 10 m */
+ SFF_8472_VENDOR_START = 20, /* Vendor name [Address A0h, Bytes
+ * 20-35] */
+ SFF_8472_VENDOR_END = 35,
+ SFF_8472_TRANS = 36, /* Transceiver Code for electronic
+ * or optical compatibility (see
+ * Table 3.5) */
+ SFF_8472_VENDOR_OUI_START = 37, /* Vendor OUI SFP vendor IEEE
+ * company ID */
+ SFF_8472_VENDOR_OUI_END = 39,
+ SFF_8472_PN_START = 40, /* Vendor PN */
+ SFF_8472_PN_END = 55,
+ SFF_8472_REV_START = 56, /* Vendor Revision */
+ SFF_8472_REV_END = 59,
+ SFF_8472_WAVELEN_START = 60, /* Wavelength Laser wavelength
+ * (Passive/Active Cable
+ * Specification Compliance) */
+ SFF_8472_WAVELEN_END = 61,
+ SFF_8472_CC_BASE = 63, /* CC_BASE Check code for Base ID
+ * Fields (addresses 0 to 62) */
+
+/*
+ * Extension Fields (optional) check the options before reading other
+ * addresses.
+ */
+ SFF_8472_OPTIONS_MSB = 64, /* Options Indicates which optional
+ * transceiver signals are
+ * implemented */
+ SFF_8472_OPTIONS_LSB = 65, /* (see Table 3.7) */
+ SFF_8472_BR_MAX = 66, /* BR max Upper bit rate margin,
+ * units of % (see details for
+ * rates > 25.0Gb/s) */
+ SFF_8472_BR_MIN = 67, /* Lower bit rate margin, units of
+ * % (see details for rates >
+ * 25.0Gb/s) */
+ SFF_8472_SN_START = 68, /* Vendor SN [Address A0h, Bytes 68-83] */
+ SFF_8472_SN_END = 83,
+ SFF_8472_DATE_START = 84, /* Date code Vendor’s manufacturing
+ * date code (see Table 3.8) */
+ SFF_8472_DATE_END = 91,
+ SFF_8472_DIAG_TYPE = 92, /* Diagnostic Monitoring Type
+ * Indicates which type of
+ * diagnostic monitoring is
+ * implemented (if any) in the
+ * transceiver (see Table 3.9)
+ */
+
+ SFF_8472_ENHANCED = 93, /* Enhanced Options Indicates which
+ * optional enhanced features are
+ * implemented (if any) in the
+ * transceiver (see Table 3.10) */
+ SFF_8472_COMPLIANCE = 94, /* SFF-8472 Compliance Indicates
+ * which revision of SFF-8472 the
+ * transceiver complies with. (see
+ * Table 3.12)*/
+ SFF_8472_CC_EXT = 95, /* Check code for the Extended ID
+ * Fields (addresses 64 to 94)
+ */
+
+ SFF_8472_VENDOR_RSRVD_START = 96,
+ SFF_8472_VENDOR_RSRVD_END = 127,
+
+ SFF_8472_RESERVED_START = 128,
+ SFF_8472_RESERVED_END = 255
+};
+
+#define SFF_8472_DIAG_IMPL (1 << 6) /* Required to be 1 */
+#define SFF_8472_DIAG_INTERNAL (1 << 5) /* Internal measurements. */
+#define SFF_8472_DIAG_EXTERNAL (1 << 4) /* External measurements. */
+#define SFF_8472_DIAG_POWER (1 << 3) /* Power measurement type */
+#define SFF_8472_DIAG_ADDR_CHG (1 << 2) /* Address change required.
+ * See SFF-8472 doc. */
+
+ /*
+ * Diagnostics are available at the two wire address 0xa2. All
+ * diagnostics are OPTIONAL so you should check 0xa0 registers 92 to
+ * see which, if any are supported.
+ */
+
+enum {SFF_8472_DIAG = 0xa2}; /* Base address for diagnostics. */
+
+ /*
+ * Table 3.15 Alarm and Warning Thresholds All values are 2 bytes
+ * and MUST be read in a single read operation starting at the MSB
+ */
+
+enum {
+ SFF_8472_TEMP_HIGH_ALM = 0, /* Temp High Alarm */
+ SFF_8472_TEMP_LOW_ALM = 2, /* Temp Low Alarm */
+ SFF_8472_TEMP_HIGH_WARN = 4, /* Temp High Warning */
+ SFF_8472_TEMP_LOW_WARN = 6, /* Temp Low Warning */
+ SFF_8472_VOLTAGE_HIGH_ALM = 8, /* Voltage High Alarm */
+ SFF_8472_VOLTAGE_LOW_ALM = 10, /* Voltage Low Alarm */
+ SFF_8472_VOLTAGE_HIGH_WARN = 12, /* Voltage High Warning */
+ SFF_8472_VOLTAGE_LOW_WARN = 14, /* Voltage Low Warning */
+ SFF_8472_BIAS_HIGH_ALM = 16, /* Bias High Alarm */
+ SFF_8472_BIAS_LOW_ALM = 18, /* Bias Low Alarm */
+ SFF_8472_BIAS_HIGH_WARN = 20, /* Bias High Warning */
+ SFF_8472_BIAS_LOW_WARN = 22, /* Bias Low Warning */
+ SFF_8472_TX_POWER_HIGH_ALM = 24, /* TX Power High Alarm */
+ SFF_8472_TX_POWER_LOW_ALM = 26, /* TX Power Low Alarm */
+ SFF_8472_TX_POWER_HIGH_WARN = 28, /* TX Power High Warning */
+ SFF_8472_TX_POWER_LOW_WARN = 30, /* TX Power Low Warning */
+ SFF_8472_RX_POWER_HIGH_ALM = 32, /* RX Power High Alarm */
+ SFF_8472_RX_POWER_LOW_ALM = 34, /* RX Power Low Alarm */
+ SFF_8472_RX_POWER_HIGH_WARN = 36, /* RX Power High Warning */
+ SFF_8472_RX_POWER_LOW_WARN = 38, /* RX Power Low Warning */
+
+ SFF_8472_RX_POWER4 = 56, /* Rx_PWR(4) Single precision
+ * floating point calibration data
+ * - Rx optical power. Bit 7 of
+ * byte 56 is MSB. Bit 0 of byte
+ * 59 is LSB. Rx_PWR(4) should be
+ * set to zero for “internally
+ * calibrated” devices. */
+ SFF_8472_RX_POWER3 = 60, /* Rx_PWR(3) Single precision
+ * floating point calibration data
+ * - Rx optical power. Bit 7 of
+ * byte 60 is MSB. Bit 0 of byte 63
+ * is LSB. Rx_PWR(3) should be set
+ * to zero for “internally
+ * calibrated” devices.*/
+ SFF_8472_RX_POWER2 = 64, /* Rx_PWR(2) Single precision
+ * floating point calibration data,
+ * Rx optical power. Bit 7 of byte
+ * 64 is MSB, bit 0 of byte 67 is
+ * LSB. Rx_PWR(2) should be set to
+ * zero for “internally calibrated”
+ * devices. */
+ SFF_8472_RX_POWER1 = 68, /* Rx_PWR(1) Single precision
+ * floating point calibration data,
+ * Rx optical power. Bit 7 of byte
+ * 68 is MSB, bit 0 of byte 71 is
+ * LSB. Rx_PWR(1) should be set to
+ * 1 for “internally calibrated”
+ * devices. */
+ SFF_8472_RX_POWER0 = 72, /* Rx_PWR(0) Single precision
+ * floating point calibration data,
+ * Rx optical power. Bit 7 of byte
+ * 72 is MSB, bit 0 of byte 75 is
+ * LSB. Rx_PWR(0) should be set to
+ * zero for “internally calibrated”
+ * devices. */
+ SFF_8472_TX_I_SLOPE = 76, /* Tx_I(Slope) Fixed decimal
+ * (unsigned) calibration data,
+ * laser bias current. Bit 7 of
+ * byte 76 is MSB, bit 0 of byte 77
+ * is LSB. Tx_I(Slope) should be
+ * set to 1 for “internally
+ * calibrated” devices. */
+ SFF_8472_TX_I_OFFSET = 78, /* Tx_I(Offset) Fixed decimal
+ * (signed two’s complement)
+ * calibration data, laser bias
+ * current. Bit 7 of byte 78 is
+ * MSB, bit 0 of byte 79 is
+ * LSB. Tx_I(Offset) should be set
+ * to zero for “internally
+ * calibrated” devices. */
+ SFF_8472_TX_POWER_SLOPE = 80, /* Tx_PWR(Slope) Fixed decimal
+ * (unsigned) calibration data,
+ * transmitter coupled output
+ * power. Bit 7 of byte 80 is MSB,
+ * bit 0 of byte 81 is LSB.
+ * Tx_PWR(Slope) should be set to 1
+ * for “internally calibrated”
+ * devices. */
+ SFF_8472_TX_POWER_OFFSET = 82, /* Tx_PWR(Offset) Fixed decimal
+ * (signed two’s complement)
+ * calibration data, transmitter
+ * coupled output power. Bit 7 of
+ * byte 82 is MSB, bit 0 of byte 83
+ * is LSB. Tx_PWR(Offset) should be
+ * set to zero for “internally
+ * calibrated” devices. */
+ SFF_8472_T_SLOPE = 84, /* T (Slope) Fixed decimal
+ * (unsigned) calibration data,
+ * internal module temperature. Bit
+ * 7 of byte 84 is MSB, bit 0 of
+ * byte 85 is LSB. T(Slope) should
+ * be set to 1 for “internally
+ * calibrated” devices. */
+ SFF_8472_T_OFFSET = 86, /* T (Offset) Fixed decimal (signed
+ * two’s complement) calibration
+ * data, internal module
+ * temperature. Bit 7 of byte 86 is
+ * MSB, bit 0 of byte 87 is LSB.
+ * T(Offset) should be set to zero
+ * for “internally calibrated”
+ * devices. */
+ SFF_8472_V_SLOPE = 88, /* V (Slope) Fixed decimal
+ * (unsigned) calibration data,
+ * internal module supply
+ * voltage. Bit 7 of byte 88 is
+ * MSB, bit 0 of byte 89 is
+ * LSB. V(Slope) should be set to 1
+ * for “internally calibrated”
+ * devices. */
+ SFF_8472_V_OFFSET = 90, /* V (Offset) Fixed decimal (signed
+ * two’s complement) calibration
+ * data, internal module supply
+ * voltage. Bit 7 of byte 90 is
+ * MSB. Bit 0 of byte 91 is
+ * LSB. V(Offset) should be set to
+ * zero for “internally calibrated”
+ * devices. */
+ SFF_8472_CHECKSUM = 95, /* Checksum Byte 95 contains the
+ * low order 8 bits of the sum of
+ * bytes 0 – 94. */
+ /* Internal measurements. */
+
+ SFF_8472_TEMP = 96, /* Internally measured module temperature. */
+ SFF_8472_VCC = 98, /* Internally measured supply
+ * voltage in transceiver.
+ */
+ SFF_8472_TX_BIAS = 100, /* Internally measured TX Bias Current. */
+ SFF_8472_TX_POWER = 102, /* Measured TX output power. */
+ SFF_8472_RX_POWER = 104, /* Measured RX input power. */
+
+ SFF_8472_STATUS = 110 /* See below */
+};
+ /* Status Bits Described */
+
+/*
+ * TX Disable State Digital state of the TX Disable Input Pin. Updated
+ * within 100ms of change on pin.
+ */
+#define SFF_8472_STATUS_TX_DISABLE (1 << 7)
+
+/*
+ * Select Read/write bit that allows software disable of
+ * laser. Writing ‘1’ disables laser. See Table 3.11 for
+ * enable/disable timing requirements. This bit is “OR”d with the hard
+ * TX_DISABLE pin value. Note, per SFP MSA TX_DISABLE pin is default
+ * enabled unless pulled low by hardware. If Soft TX Disable is not
+ * implemented, the transceiver ignores the value of this bit. Default
+ * power up value is zero/low.
+ */
+#define SFF_8472_STATUS_SOFT_TX_DISABLE (1 << 6)
+
+/*
+ * RS(1) State Digital state of SFP input pin AS(1) per SFF-8079 or
+ * RS(1) per SFF-8431. Updated within 100ms of change on pin. See A2h
+ * Byte 118, Bit 3 for Soft RS(1) Select control information.
+ */
+#define SFF_8472_RS_STATE (1 << 5)
+
+/*
+ * Rate_Select State [aka. “RS(0)”] Digital state of the SFP
+ * Rate_Select Input Pin. Updated within 100ms of change on pin. Note:
+ * This pin is also known as AS(0) in SFF-8079 and RS(0) in SFF-8431.
+ */
+#define SFF_8472_STATUS_SELECT_STATE (1 << 4)
+
+/*
+ * Read/write bit that allows software rate select control. Writing
+ * ‘1’ selects full bandwidth operation. This bit is “OR’d with the
+ * hard Rate_Select, AS(0) or RS(0) pin value. See Table 3.11 for
+ * timing requirements. Default at power up is logic zero/low. If Soft
+ * Rate Select is not implemented, the transceiver ignores the value
+ * of this bit. Note: Specific transceiver behaviors of this bit are
+ * identified in Table 3.6a and referenced documents. See Table 3.18a,
+ * byte 118, bit 3 for Soft RS(1) Select.
+ */
+#define SFF_8472_STATUS_SOFT_RATE_SELECT (1 << 3)
+
+/*
+ * TX Fault State Digital state of the TX Fault Output Pin. Updated
+ * within 100ms of change on pin.
+ */
+#define SFF_8472_STATUS_TX_FAULT_STATE (1 << 2)
+
+/*
+ * Digital state of the RX_LOS Output Pin. Updated within 100ms of
+ * change on pin.
+ */
+#define SFF_8472_STATUS_RX_LOS (1 << 1)
+
+/*
+ * Indicates transceiver has achieved power up and data is ready. Bit
+ * remains high until data is ready to be read at which time the
+ * device sets the bit low.
+ */
+#define SFF_8472_STATUS_DATA_READY (1 << 0)
+
+/*
+ * Table 3.2 Identifier values.
+ * Identifier constants has taken from SFF-8024 rev 2.9 table 4.1
+ * (as referenced by table 3.2 footer)
+ * */
+enum {
+ SFF_8024_ID_UNKNOWN = 0x0, /* Unknown or unspecified */
+ SFF_8024_ID_GBIC = 0x1, /* GBIC */
+ SFF_8024_ID_SFF = 0x2, /* Module soldered to motherboard (ex: SFF)*/
+ SFF_8024_ID_SFP = 0x3, /* SFP or SFP “Plus” */
+ SFF_8024_ID_XBI = 0x4, /* 300 pin XBI */
+ SFF_8024_ID_XENPAK = 0x5, /* Xenpak */
+ SFF_8024_ID_XFP = 0x6, /* XFP */
+ SFF_8024_ID_XFF = 0x7, /* XFF */
+ SFF_8024_ID_XFPE = 0x8, /* XFP-E */
+ SFF_8024_ID_XPAK = 0x9, /* XPAk */
+ SFF_8024_ID_X2 = 0xA, /* X2 */
+ SFF_8024_ID_DWDM_SFP = 0xB, /* DWDM-SFP */
+ SFF_8024_ID_QSFP = 0xC, /* QSFP */
+ SFF_8024_ID_QSFPPLUS = 0xD, /* QSFP+ */
+ SFF_8024_ID_CXP = 0xE, /* CXP */
+ SFF_8024_ID_HD4X = 0xF, /* Shielded Mini Multilane HD 4X */
+ SFF_8024_ID_HD8X = 0x10, /* Shielded Mini Multilane HD 8X */
+ SFF_8024_ID_QSFP28 = 0x11, /* QSFP28 */
+ SFF_8024_ID_CXP2 = 0x12, /* CXP2 (aka CXP28) */
+ SFF_8024_ID_CDFP = 0x13, /* CDFP (Style 1/Style 2) */
+ SFF_8024_ID_SMM4 = 0x14, /* Shielded Mini Multilate HD 4X Fanout */
+ SFF_8024_ID_SMM8 = 0x15, /* Shielded Mini Multilate HD 8X Fanout */
+ SFF_8024_ID_CDFP3 = 0x16, /* CDFP (Style3) */
+ SFF_8024_ID_LAST = SFF_8024_ID_CDFP3
+ };
+
+static const char *sff_8024_id[SFF_8024_ID_LAST + 1] = {"Unknown",
+ "GBIC",
+ "SFF",
+ "SFP/SFP+/SFP28",
+ "XBI",
+ "Xenpak",
+ "XFP",
+ "XFF",
+ "XFP-E",
+ "XPAK",
+ "X2",
+ "DWDM-SFP/SFP+",
+ "QSFP",
+ "QSFP+",
+ "CXP",
+ "HD4X",
+ "HD8X",
+ "QSFP28",
+ "CXP2",
+ "CDFP",
+ "SMM4",
+ "SMM8",
+ "CDFP3"};
+
+/* Keep compability with old definitions */
+#define SFF_8472_ID_UNKNOWN SFF_8024_ID_UNKNOWN
+#define SFF_8472_ID_GBIC SFF_8024_ID_GBIC
+#define SFF_8472_ID_SFF SFF_8024_ID_SFF
+#define SFF_8472_ID_SFP SFF_8024_ID_SFP
+#define SFF_8472_ID_XBI SFF_8024_ID_XBI
+#define SFF_8472_ID_XENPAK SFF_8024_ID_XENPAK
+#define SFF_8472_ID_XFP SFF_8024_ID_XFP
+#define SFF_8472_ID_XFF SFF_8024_ID_XFF
+#define SFF_8472_ID_XFPE SFF_8024_ID_XFPE
+#define SFF_8472_ID_XPAK SFF_8024_ID_XPAK
+#define SFF_8472_ID_X2 SFF_8024_ID_X2
+#define SFF_8472_ID_DWDM_SFP SFF_8024_ID_DWDM_SFP
+#define SFF_8472_ID_QSFP SFF_8024_ID_QSFP
+#define SFF_8472_ID_LAST SFF_8024_ID_LAST
+
+#define sff_8472_id sff_8024_id
+
+/*
+ * Table 3.9 Diagnostic Monitoring Type (byte 92)
+ * bits described.
+ */
+
+/*
+ * Digital diagnostic monitoring implemented.
+ * Set to 1 for transceivers implementing DDM.
+ */
+#define SFF_8472_DDM_DONE (1 << 6)
+
+/*
+ * Measurements are internally calibrated.
+ */
+#define SFF_8472_DDM_INTERNAL (1 << 5)
+
+/*
+ * Measurements are externally calibrated.
+ */
+#define SFF_8472_DDM_EXTERNAL (1 << 4)
+
+/*
+ * Received power measurement type
+ * 0 = OMA, 1 = average power
+ */
+#define SFF_8472_DDM_PMTYPE (1 << 3)
+
+/* Table 3.13 and 3.14 Temperature Conversion Values */
+#define SFF_8472_TEMP_SIGN (1 << 15)
+#define SFF_8472_TEMP_SHIFT 8
+#define SFF_8472_TEMP_MSK 0xEF00
+#define SFF_8472_TEMP_FRAC 0x00FF
+
+/* Internal Callibration Conversion factors */
+
+/*
+ * Represented as a 16 bit unsigned integer with the voltage defined
+ * as the full 16 bit value (0 – 65535) with LSB equal to 100 uVolt,
+ * yielding a total range of 0 to +6.55 Volts.
+ */
+#define SFF_8472_VCC_FACTOR 10000.0
+
+/*
+ * Represented as a 16 bit unsigned integer with the current defined
+ * as the full 16 bit value (0 – 65535) with LSB equal to 2 uA,
+ * yielding a total range of 0 to 131 mA.
+ */
+
+#define SFF_8472_BIAS_FACTOR 2000.0
+
+/*
+ * Represented as a 16 bit unsigned integer with the power defined as
+ * the full 16 bit value (0 – 65535) with LSB equal to 0.1 uW,
+ * yielding a total range of 0 to 6.5535 mW (~ -40 to +8.2 dBm).
+ */
+
+#define SFF_8472_POWER_FACTOR 10000.0
Property changes on: trunk/sys/net/sff8472.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/net/slcompress.c
===================================================================
--- trunk/sys/net/slcompress.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/slcompress.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)slcompress.c 8.2 (Berkeley) 4/16/94
- * $FreeBSD: stable/9/sys/net/slcompress.c 128019 2004-04-07 20:46:16Z imp $
+ * $FreeBSD: stable/10/sys/net/slcompress.c 128019 2004-04-07 20:46:16Z imp $
*/
/*
Modified: trunk/sys/net/slcompress.h
===================================================================
--- trunk/sys/net/slcompress.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/slcompress.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -32,7 +32,7 @@
*
* Van Jacobson (van at helios.ee.lbl.gov), Dec 31, 1989:
* - Initial distribution.
- * $FreeBSD: stable/9/sys/net/slcompress.h 139823 2005-01-07 01:45:51Z imp $
+ * $FreeBSD: stable/10/sys/net/slcompress.h 139823 2005-01-07 01:45:51Z imp $
*/
#ifndef _NET_SLCOMPRESS_H_
Modified: trunk/sys/net/vnet.c
===================================================================
--- trunk/sys/net/vnet.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/vnet.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/net/vnet.c 249132 2013-04-05 08:22:11Z mav $");
+__FBSDID("$FreeBSD: stable/10/sys/net/vnet.c 262734 2014-03-04 14:01:12Z glebius $");
#include "opt_ddb.h"
#include "opt_kdb.h"
@@ -211,14 +211,14 @@
static struct sx vnet_data_free_lock;
SDT_PROVIDER_DEFINE(vnet);
-SDT_PROBE_DEFINE1(vnet, functions, vnet_alloc, entry, entry, "int");
-SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, alloc, alloc, "int",
+SDT_PROBE_DEFINE1(vnet, functions, vnet_alloc, entry, "int");
+SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, alloc, "int",
"struct vnet *");
-SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, return, return,
+SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, return,
"int", "struct vnet *");
-SDT_PROBE_DEFINE2(vnet, functions, vnet_destroy, entry, entry,
+SDT_PROBE_DEFINE2(vnet, functions, vnet_destroy, entry,
"int", "struct vnet *");
-SDT_PROBE_DEFINE1(vnet, functions, vnet_destroy, return, entry,
+SDT_PROBE_DEFINE1(vnet, functions, vnet_destroy, return,
"int");
#ifdef DDB
@@ -466,47 +466,6 @@
}
/*
- * Variants on sysctl_handle_foo that know how to handle virtualized global
- * variables: if 'arg1' is a pointer, then we transform it to the local vnet
- * offset.
- */
-int
-vnet_sysctl_handle_int(SYSCTL_HANDLER_ARGS)
-{
-
- if (arg1 != NULL)
- arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
- return (sysctl_handle_int(oidp, arg1, arg2, req));
-}
-
-int
-vnet_sysctl_handle_opaque(SYSCTL_HANDLER_ARGS)
-{
-
- if (arg1 != NULL)
- arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
- return (sysctl_handle_opaque(oidp, arg1, arg2, req));
-}
-
-int
-vnet_sysctl_handle_string(SYSCTL_HANDLER_ARGS)
-{
-
- if (arg1 != NULL)
- arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
- return (sysctl_handle_string(oidp, arg1, arg2, req));
-}
-
-int
-vnet_sysctl_handle_uint(SYSCTL_HANDLER_ARGS)
-{
-
- if (arg1 != NULL)
- arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
- return (sysctl_handle_int(oidp, arg1, arg2, req));
-}
-
-/*
* Support for special SYSINIT handlers registered via VNET_SYSINIT()
* and VNET_SYSUNINIT().
*/
Modified: trunk/sys/net/vnet.h
===================================================================
--- trunk/sys/net/vnet.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/vnet.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -33,7 +33,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/net/vnet.h 218567 2011-02-11 14:17:58Z bz $
+ * $FreeBSD: stable/10/sys/net/vnet.h 262735 2014-03-04 14:05:37Z glebius $
*/
/*-
@@ -86,6 +86,56 @@
#ifdef _KERNEL
+#define VNET_PCPUSTAT_DECLARE(type, name) \
+ VNET_DECLARE(counter_u64_t, name[sizeof(type) / sizeof(uint64_t)])
+
+#define VNET_PCPUSTAT_DEFINE(type, name) \
+ VNET_DEFINE(counter_u64_t, name[sizeof(type) / sizeof(uint64_t)])
+
+#define VNET_PCPUSTAT_ALLOC(name, wait) \
+ COUNTER_ARRAY_ALLOC(VNET(name), \
+ sizeof(VNET(name)) / sizeof(counter_u64_t), (wait))
+
+#define VNET_PCPUSTAT_FREE(name) \
+ COUNTER_ARRAY_FREE(VNET(name), sizeof(VNET(name)) / sizeof(counter_u64_t))
+
+#define VNET_PCPUSTAT_ADD(type, name, f, v) \
+ counter_u64_add(VNET(name)[offsetof(type, f) / sizeof(uint64_t)], (v))
+
+#define VNET_PCPUSTAT_SYSINIT(name) \
+static void \
+vnet_##name##_init(const void *unused) \
+{ \
+ VNET_PCPUSTAT_ALLOC(name, M_WAITOK); \
+} \
+VNET_SYSINIT(vnet_ ## name ## _init, SI_SUB_PROTO_IFATTACHDOMAIN, \
+ SI_ORDER_ANY, vnet_ ## name ## _init, NULL)
+
+#define VNET_PCPUSTAT_SYSUNINIT(name) \
+static void \
+vnet_##name##_uninit(const void *unused) \
+{ \
+ VNET_PCPUSTAT_FREE(name); \
+} \
+VNET_SYSUNINIT(vnet_ ## name ## _uninit, SI_SUB_PROTO_IFATTACHDOMAIN, \
+ SI_ORDER_ANY, vnet_ ## name ## _uninit, NULL)
+
+#define SYSCTL_VNET_PCPUSTAT(parent, nbr, name, type, array, desc) \
+static int \
+array##_sysctl(SYSCTL_HANDLER_ARGS) \
+{ \
+ type s; \
+ CTASSERT((sizeof(type) / sizeof(uint64_t)) == \
+ (sizeof(VNET(array)) / sizeof(counter_u64_t))); \
+ COUNTER_ARRAY_COPY(VNET(array), &s, sizeof(type) / sizeof(uint64_t));\
+ if (req->newptr) \
+ COUNTER_ARRAY_ZERO(VNET(array), \
+ sizeof(type) / sizeof(uint64_t)); \
+ return (SYSCTL_OUT(req, &s, sizeof(type))); \
+} \
+SYSCTL_VNET_PROC(parent, nbr, name, CTLTYPE_OPAQUE | CTLFLAG_RW, NULL, \
+ 0, array ## _sysctl, "I", desc)
+
#ifdef VIMAGE
#include <sys/lock.h>
#include <sys/proc.h> /* for struct thread */
@@ -241,15 +291,10 @@
* arguments themselves, if required.
*/
#ifdef SYSCTL_OID
-int vnet_sysctl_handle_int(SYSCTL_HANDLER_ARGS);
-int vnet_sysctl_handle_opaque(SYSCTL_HANDLER_ARGS);
-int vnet_sysctl_handle_string(SYSCTL_HANDLER_ARGS);
-int vnet_sysctl_handle_uint(SYSCTL_HANDLER_ARGS);
-
#define SYSCTL_VNET_INT(parent, nbr, name, access, ptr, val, descr) \
SYSCTL_OID(parent, nbr, name, \
CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_VNET|(access), \
- ptr, val, vnet_sysctl_handle_int, "I", descr)
+ ptr, val, sysctl_handle_int, "I", descr)
#define SYSCTL_VNET_PROC(parent, nbr, name, access, ptr, arg, handler, \
fmt, descr) \
CTASSERT(((access) & CTLTYPE) != 0); \
@@ -259,20 +304,20 @@
descr) \
SYSCTL_OID(parent, nbr, name, \
CTLTYPE_OPAQUE|CTLFLAG_VNET|(access), ptr, len, \
- vnet_sysctl_handle_opaque, fmt, descr)
+ sysctl_handle_opaque, fmt, descr)
#define SYSCTL_VNET_STRING(parent, nbr, name, access, arg, len, descr) \
SYSCTL_OID(parent, nbr, name, \
CTLTYPE_STRING|CTLFLAG_VNET|(access), \
- arg, len, vnet_sysctl_handle_string, "A", descr)
+ arg, len, sysctl_handle_string, "A", descr)
#define SYSCTL_VNET_STRUCT(parent, nbr, name, access, ptr, type, descr) \
SYSCTL_OID(parent, nbr, name, \
CTLTYPE_OPAQUE|CTLFLAG_VNET|(access), ptr, \
- sizeof(struct type), vnet_sysctl_handle_opaque, "S," #type, \
+ sizeof(struct type), sysctl_handle_opaque, "S," #type, \
descr)
#define SYSCTL_VNET_UINT(parent, nbr, name, access, ptr, val, descr) \
SYSCTL_OID(parent, nbr, name, \
CTLTYPE_UINT|CTLFLAG_MPSAFE|CTLFLAG_VNET|(access), \
- ptr, val, vnet_sysctl_handle_uint, "IU", descr)
+ ptr, val, sysctl_handle_int, "IU", descr)
#define VNET_SYSCTL_ARG(req, arg1) do { \
if (arg1 != NULL) \
arg1 = (void *)(TD_TO_VNET((req)->td)->vnet_data_base + \
Modified: trunk/sys/net/zlib.c
===================================================================
--- trunk/sys/net/zlib.c 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/zlib.c 2018-05-25 20:05:59 UTC (rev 9938)
@@ -11,7 +11,7 @@
* - added inflateIncomp and deflateOutputPending
* - allow strm->next_out to be NULL, meaning discard the output
*
- * $FreeBSD: stable/9/sys/net/zlib.c 149993 2005-09-11 16:13:02Z rodrigc $
+ * $FreeBSD: stable/10/sys/net/zlib.c 245102 2013-01-06 14:59:59Z peter $
*/
/*
@@ -26,7 +26,14 @@
#define MY_ZCALLOC
#if defined(__FreeBSD__) && defined(_KERNEL)
-#define inflate inflate_ppp /* FreeBSD already has an inflate :-( */
+#define _tr_init _zlib104_tr_init
+#define _tr_align _zlib104_tr_align
+#define _tr_tally _zlib104_tr_tally
+#define _tr_flush_block _zlib104_tr_flush_block
+#define _tr_stored_block _zlib104_tr_stored_block
+#define inflate_fast _zlib104_inflate_fast
+#define inflate _zlib104_inflate
+#define zlibVersion _zlib104_Version
#endif
Modified: trunk/sys/net/zlib.h
===================================================================
--- trunk/sys/net/zlib.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/zlib.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -1,5 +1,5 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/net/zlib.h 204552 2010-03-02 06:58:58Z alfred $ */
+/* $FreeBSD: stable/10/sys/net/zlib.h 245102 2013-01-06 14:59:59Z peter $ */
/*
* This file is derived from zlib.h and zconf.h from the zlib-1.0.4
@@ -110,7 +110,7 @@
#if (defined(_WIN32) || defined(__WIN32__)) && !defined(WIN32)
# define WIN32
#endif
-#if defined(__GNUC__) || defined(WIN32) || defined(__386__) || defined(i386)
+#if defined(__GNUC__) || defined(WIN32) || defined(__386__) || defined(__i386__)
# ifndef __32BIT__
# define __32BIT__
# endif
@@ -512,7 +512,7 @@
*/
#if defined(__FreeBSD__) && defined(_KERNEL)
-#define inflate inflate_ppp /* FreeBSD already has an inflate :-( */
+#define inflate _zlib104_inflate /* FreeBSD already has an inflate :-( */
#endif
extern int EXPORT inflate OF((z_streamp strm, int flush));
Modified: trunk/sys/net/zutil.h
===================================================================
--- trunk/sys/net/zutil.h 2018-05-25 20:04:31 UTC (rev 9937)
+++ trunk/sys/net/zutil.h 2018-05-25 20:05:59 UTC (rev 9938)
@@ -10,7 +10,7 @@
*/
/* From: zutil.h,v 1.16 1996/07/24 13:41:13 me Exp $ */
-/* $FreeBSD: stable/9/sys/net/zutil.h 204552 2010-03-02 06:58:58Z alfred $ */
+/* $FreeBSD: stable/10/sys/net/zutil.h 204552 2010-03-02 06:58:58Z alfred $ */
#ifndef _Z_UTIL_H
#define _Z_UTIL_H
More information about the Midnightbsd-cvs
mailing list